diff --git a/.gitlab-ci-gputest.yml b/.gitlab-ci-gputest.yml index c3d00113c..6594d88fd 100644 --- a/.gitlab-ci-gputest.yml +++ b/.gitlab-ci-gputest.yml @@ -1,14 +1,23 @@ -test:rocm241: - extends: .unittest +test:vega20: + extends: .test + dependencies: + - build:rocm tags: - - tag241 + - vega20 + - rocm -test:rocm243: - extends: .unittest +test:s9300: + extends: .test + dependencies: + - build:rocm tags: - - tag243 + - s9300 + - rocm -test:rocm244: - extends: .unittest +test:mi25: + extends: .test + dependencies: + - build:rocm tags: - - tag244 + - mi25 + - rocm diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7c9ed50c2..ef5232ac8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -45,40 +45,39 @@ before_script: - export CXXFLAGS=$CXXFLAGS" "$LOCAL_CXXFLAGS - export CMAKE_OPTIONS=$CXXFLAGS" "$LOCAL_CMAKE_OPTIONS -build:rocm: +.build: stage: build - tags: - - rocm variables: SUDO_CMD: "sudo -E" - script: - - mkdir build - - cd build - - CXX=hcc cmake -DBUILD_TEST=ON -DBUILD_EXAMPLE=ON -DBUILD_BENCHMARK=ON ../. - - make -j16 - - make package artifacts: paths: - build/library/ - build/test/test_* - build/test/rocprim/test_* - - build/test/hipcub/test_* - build/test/CTestTestfile.cmake - build/test/rocprim/CTestTestfile.cmake - - build/test/hipcub/CTestTestfile.cmake - build/gtest/ - build/CMakeCache.txt - build/CTestTestfile.cmake - build/rocprim*.deb - build/rocprim*.zip expire_in: 2 weeks + +build:rocm: + extends: .build + tags: + - rocm + script: + - mkdir build + - cd build + - CXX=hcc cmake -DBUILD_TEST=ON -DBUILD_EXAMPLE=ON -DBUILD_BENCHMARK=ON ../. + - make -j16 + - make package -.unittest: +.test: stage: test variables: SUDO_CMD: "sudo -E" - dependencies: - - build:rocm script: - cd build - $SUDO_CMD ctest --output-on-failure --repeat-until-fail 2 diff --git a/CMakeLists.txt b/CMakeLists.txt index 42be31b77..768ab8278 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,14 +76,8 @@ set(AMDGPU_TARGETS gfx803;gfx900;gfx906 CACHE STRING "List of specific machine t include(cmake/Summary.cmake) print_configuration_summary() -# rocPRIM works only on hcc -if(HIP_PLATFORM STREQUAL "hcc") - # rocPRIM library - add_subdirectory(rocprim) -endif() - -# hipCUB library -add_subdirectory(hipcub) +# rocPRIM library +add_subdirectory(rocprim) # Tests if(BUILD_TEST AND NOT ONLY_INSTALL) @@ -91,28 +85,20 @@ if(BUILD_TEST AND NOT ONLY_INSTALL) add_subdirectory(test) endif() -# Benchmarks and examples are only for rocPRIM -if(HIP_PLATFORM STREQUAL "hcc") - # Benchmarks - if(BUILD_BENCHMARK AND NOT ONLY_INSTALL) - add_subdirectory(benchmark) - endif() - - # Examples - if(BUILD_EXAMPLE AND NOT ONLY_INSTALL) - add_subdirectory(example) - endif() +# Benchmarks +if(BUILD_BENCHMARK AND NOT ONLY_INSTALL) + add_subdirectory(benchmark) +endif() + +# Examples +if(BUILD_EXAMPLE AND NOT ONLY_INSTALL) + add_subdirectory(example) endif() # Package -if(HIP_PLATFORM STREQUAL "hcc") - set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_hcc (>= 1.5.18263)") # 1.5.18263 is HIP version in ROCm 1.8.2 - set(CPACK_RPM_PACKAGE_REQUIRES "hip_hcc >= 1.5.18263") -else() - set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_nvcc (>= 1.5.18263)") - set(CPACK_RPM_PACKAGE_REQUIRES "hip_nvcc >= 1.5.18263") -endif() +set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_hcc (>= 1.5.18263)") # 1.5.18263 is HIP version in ROCm 1.8.2 +set(CPACK_RPM_PACKAGE_REQUIRES "hip_hcc >= 1.5.18263") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt") if(NOT CPACK_PACKAGING_INSTALL_PREFIX) set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") @@ -120,17 +106,8 @@ endif() set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}" "\${CPACK_PACKAGING_INSTALL_PREFIX}/include" ) -# For CUDA backend the package contains only hipcub -if(HIP_PLATFORM STREQUAL "hcc") - rocm_create_package( - NAME rocprim - DESCRIPTION "Radeon Open Compute Parallel Primitives Library" - MAINTAINER "Stream HPC Maintainers " - ) -else() - rocm_create_package( - NAME rocprim-hipcub - DESCRIPTION "Radeon Open Compute Parallel Primitives Library (hipCUB only)" - MAINTAINER "Stream HPC Maintainers " - ) -endif() +rocm_create_package( + NAME rocprim + DESCRIPTION "Radeon Open Compute Parallel Primitives Library" + MAINTAINER "Stream HPC Maintainers " +) \ No newline at end of file diff --git a/README.md b/README.md index 24c5e77bc..a90875639 100644 --- a/README.md +++ b/README.md @@ -63,13 +63,12 @@ define `ROCPRIM_HC_API` before the `#include` statement. Alternatively, user can #include // HC API ``` -Recommended way of including rocPRIM or hipCUB into a CMake project is by using their package -configuration files. hipCUB package name is `hipcub`, rocPRIM package name is `rocprim`. +Recommended way of including rocPRIM into a CMake project is by using its package +configuration files. rocPRIM package name is `rocprim`. ```cmake # "/opt/rocm" - default install prefix find_package(rocprim REQUIRED CONFIG PATHS "/opt/rocm/rocprim") -find_package(hipcub REQUIRED CONFIG PATHS "/opt/rocm/hipcub") ... @@ -80,12 +79,8 @@ target_link_libraries( roc::rocprim) # Includes rocPRIM headers and required HC dependencies target_link_libraries( roc::rocprim_hc) -# Includes rocPRIM headers and required HIP dependencies +# Includes rocPRIM headers and required HIP dependencies target_link_libraries( roc::rocprim_hip) - -# On ROCm: includes hipCUB headers and roc::rocprim_hip target -# On CUDA: includes only hipCUB headers, user has to include CUB directory -target_link_libraries( hip::hipcub) ``` ## Running Unit Tests @@ -97,9 +92,6 @@ cd rocPRIM; cd build # To run all tests ctest -# To run unit tests for hipCUB -./test/hipcub/ - # To run unit tests for rocPRIM ./test/rocprim/ ``` @@ -150,9 +142,16 @@ cd rocPRIM; cd doc doxygen Doxyfile # open html/index.html - ``` +## hipCUB + +hipCUB is a thin wrapper library on top of [rocPRIM](https://github.com/ROCmSoftwarePlatform/rocPRIM) or +[CUB](https://github.com/NVlabs/cub). It enables developers to port project that uses CUB library to the +[HIP](https://github.com/ROCm-Developer-Tools/HIP) layer and to run them on AMD hardware. In [ROCm](https://rocm.github.io/) +environment hipCUB uses rocPRIM library as the backend, however, on CUDA platforms it uses CUB instead. + + ## Support Bugs and feature requests can be reported through [the issue tracker](https://github.com/ROCmSoftwarePlatform/rocPRIM/issues). diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 47d7791bf..b3046688d 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -42,36 +42,7 @@ find_package(Git REQUIRED) if (NOT Git_FOUND) message(FATAL_ERROR "Please ensure Git is installed on the system") endif() - -# CUB (only for CUDA platform) -if(HIP_PLATFORM STREQUAL "nvcc") - if(NOT DEFINED CUB_INCLUDE_DIR) - file( - DOWNLOAD https://github.com/NVlabs/cub/archive/1.8.0.zip - ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip - STATUS cub_download_status LOG cub_download_log - ) - list(GET cub_download_status 0 cub_download_error_code) - if(cub_download_error_code) - message(FATAL_ERROR "Error: downloading " - "https://github.com/NVlabs/cub/archive/1.8.0.zip failed " - "error_code: ${cub_download_error_code} " - "log: ${cub_download_log} " - ) - endif() - - execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - RESULT_VARIABLE cub_unpack_error_code - ) - if(cub_unpack_error_code) - message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip failed") - endif() - set(CUB_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0/ CACHE PATH "") - endif() -endif() - + # Test dependencies if(BUILD_TEST) # Google Test (https://github.com/google/googletest) diff --git a/cmake/SetupNVCC.cmake b/cmake/SetupNVCC.cmake deleted file mode 100644 index aea66331a..000000000 --- a/cmake/SetupNVCC.cmake +++ /dev/null @@ -1,102 +0,0 @@ -# MIT License -# -# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# Find HIP package and verify that correct C++ compiler was selected for available -# platfrom. On ROCm platform host and device code is compiled by the same compiler: -# hcc. On CUDA host can be compiled by any C++ compiler while device code is compiled -# by nvcc compiler (CMake's CUDA package handles this). - -# A function for automatic detection of the lowest CC of the installed NV GPUs -function(hip_cuda_detect_lowest_cc out_variable) - set(__cufile ${PROJECT_BINARY_DIR}/detect_nvgpus_cc.cu) - - file(WRITE ${__cufile} "" - "#include \n" - "int main()\n" - "{\n" - " int count = 0;\n" - " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" - " if (count == 0) return -1;\n" - " int major = 1000;\n" - " int minor = 1000;\n" - " for (int device = 0; device < count; ++device)\n" - " {\n" - " cudaDeviceProp prop;\n" - " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" - " if (prop.major < major || (prop.major == major && prop.minor < minor)){\n" - " major = prop.major; minor = prop.minor;\n" - " }\n" - " }\n" - " std::printf(\"%d%d\", major, minor);\n" - " return 0;\n" - "}\n") - - execute_process( - COMMAND ${HIP_HIPCC_EXECUTABLE} "-Wno-deprecated-gpu-targets" "--run" "${__cufile}" - WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" - RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out - ) - - if(__nvcc_res EQUAL 0) - set(HIP_CUDA_lowest_cc ${__nvcc_out} CACHE INTERNAL "The lowest CC of installed NV GPUs" FORCE) - endif() - - if(NOT HIP_CUDA_lowest_cc) - set(HIP_CUDA_lowest_cc "20") - set(${out_variable} ${HIP_CUDA_lowest_cc} PARENT_SCOPE) - else() - set(${out_variable} ${HIP_CUDA_lowest_cc} PARENT_SCOPE) - endif() -endfunction() - -################################################################################################ -# Non macro/function section -################################################################################################ - -# Get CUDA -find_package(CUDA REQUIRED) - -# Finds lowest supported CUDA CC -# -# Use NVGPU_TARGETS to set CUDA arch compilation flags -# For example: -DNVGPU_TARGETS="--gpu-architecture=compute_50 --gpu-code=compute_50,sm_50,sm_52" -set(HIP_NVCC_FLAGS " ${HIP_NVCC_FLAGS} -Wno-deprecated-gpu-targets") # Suppressing warnings -if("x${NVGPU_TARGETS}" STREQUAL "x") - hip_cuda_detect_lowest_cc(lowest_cc) - set(HIP_NVCC_FLAGS "${HIP_NVCC_FLAGS} --gpu-architecture=sm_${lowest_cc}") -else() - set(HIP_NVCC_FLAGS "${HIP_NVCC_FLAGS} ${NVGPU_TARGETS}") -endif() - -# Add HIP flags/options/includes to CUDA_NVCC_FLAGS -execute_process( - COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --cpp_config - OUTPUT_VARIABLE HIP_CPP_CONFIG_FLAGS - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_STRIP_TRAILING_WHITESPACE -) -string(REPLACE " " ";" HIP_CPP_CONFIG_FLAGS ${HIP_CPP_CONFIG_FLAGS}) -list(APPEND CUDA_NVCC_FLAGS "-std=c++11 ${HIP_CPP_CONFIG_FLAGS} ${HIP_NVCC_FLAGS}") - -# Ignore warnings about #pragma unroll -# and about deprecated CUDA function(s) used in hip/nvcc_detail/hip_runtime_api.h -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas -Wno-deprecated-declarations") diff --git a/cmake/VerifyCompiler.cmake b/cmake/VerifyCompiler.cmake index 66baa92df..2b706a67c 100644 --- a/cmake/VerifyCompiler.cmake +++ b/cmake/VerifyCompiler.cmake @@ -21,17 +21,12 @@ # SOFTWARE. # Find HIP package and verify that correct C++ compiler was selected for available -# platfrom. On ROCm platform host and device code is compiled by the same compiler: -# hcc. On CUDA host can be compiled by any C++ compiler while device code is compiled -# by nvcc compiler (CMake's CUDA package handles this). +# platfrom. On ROCm platform host and device code is compiled by the same compiler: hcc. # Find HIP package find_package(HIP 1.5.18263 REQUIRED) # 1.5.18263 is HIP version in ROCm 1.8.2 -if(HIP_PLATFORM STREQUAL "nvcc") - include(cmake/SetupNVCC.cmake) - message(STATUS "rocPRIM does not support NVCC. Only hipCUB will be available.") -elseif(HIP_PLATFORM STREQUAL "hcc") +if(HIP_PLATFORM STREQUAL "hcc") if(NOT (CMAKE_CXX_COMPILER MATCHES ".*/hcc$" OR CMAKE_CXX_COMPILER MATCHES ".*/hipcc$")) message(FATAL_ERROR "On ROCm platform 'hcc' or 'clang' must be used as C++ compiler.") else() @@ -63,5 +58,5 @@ elseif(HIP_PLATFORM STREQUAL "hcc") find_package(hip REQUIRED CONFIG PATHS /opt/rocm) endif() else() - message(FATAL_ERROR "HIP_PLATFORM must be 'hcc' or 'clang' (AMD ROCm platform) or `nvcc` (NVIDIA CUDA platform).") + message(FATAL_ERROR "HIP_PLATFORM must be 'hcc' or 'clang' (AMD ROCm platform)") endif() diff --git a/doc/hipcub.dox b/doc/hipcub.dox deleted file mode 100644 index c8fff4875..000000000 --- a/doc/hipcub.dox +++ /dev/null @@ -1,47 +0,0 @@ -/** -@brief hipCUB Library -@author -@file -*/ - -/** - * \defgroup hipcub hipCUB Library - * @{ - * hipCUB is a thin wrapper library on top of which enables developers to port project using CUB library to the - * [HIP](https://github.com/ROCm-Developer-Tools/HIP) layer and to run them on AMD hardware. In [ROCm](https://rocm.github.io/) - * environment hipCUB uses rocPRIM library as the backend, however, on CUDA platforms it uses CUB instead. - * - * @page hipcub_notes_page General Notes - * - * * When using hipCUB you should only include `hipcub.hpp` header. - * * When rocPRIM HIP API is used as backend `HIPCUB_ROCPRIM_API` is defined. - * * When CUB is used as backend `HIPCUB_CUB_API` is defined. - * * Backends are automaticaly selected based on platform detected by HIP layer (`__HIP_PLATFORM_HCC__`, `__HIP_PLATFORM_NVCC__`). - * - * @page hipcub_rocprim_page rocPRIM backend - * hipCUB with rocPRIM HIP API backend may not support all function and features CUB has because of the differences - * between ROCm (HIP) platform and CUDA platform. - * - * Not-supported features and differences: - * - * * Functions, classes and macros which are not in the public API or not documented are not supported. - * * Device-wide primitives can't be called from kernels (dynamic parallelism is not supported in HIP/HC). - * * `DeviceSpmv` is not supported. - * * Fancy iterators: `CacheModifiedInputIterator`, `CacheModifiedOutputIterator`, and `TexRefInputIterator` - * are not supported. - * * Thread I/O: - * * `CacheLoadModifier`, `CacheStoreModifier` cache modifiers are not supported. - * * `ThreadLoad`, `ThreadStore` functions are not supported. - * * Storage management and debug functions: - * * `CachingDeviceAllocator` class is not supported. - * * `Debug`, `PtxVersion`, `SmVersion` functions and `CubDebug`, `CubDebugExit`, `_CubLog` macros are not supported. - * * Intrinsics: - * * `ThreadExit`, `ThreadTrap` - not supported. - * * Warp thread masks (when used) are 64-bit unsigned integers. - * * `member_mask` input argument is ignored in `WARP_*` functions. - * * Arguments `first_lane`, `last_lane`, and `member_mask` are ignored in `Shuffle*` functions. - * * Utilities: - * * `SwizzleScanOp`, `ReduceBySegmentOp`, `ReduceByKeyOp`, CastOp` - not supported. - * - * @} - */ \ No newline at end of file diff --git a/hipcub/CMakeLists.txt b/hipcub/CMakeLists.txt deleted file mode 100644 index ad860b49e..000000000 --- a/hipcub/CMakeLists.txt +++ /dev/null @@ -1,98 +0,0 @@ -# MIT License -# -# Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# hipCUB header-only library - -# Configure a header file to pass the hipCUB version -configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/include/hipcub/hipcub_version.hpp.in" - "${CMAKE_CURRENT_BINARY_DIR}/include/hipcub/hipcub_version.hpp" - @ONLY -) - -# Only header target, does not include dependencies -add_library(hipcub INTERFACE) -target_include_directories(hipcub - INTERFACE - $ - $ - $ - $ -) - -if(HIP_PLATFORM STREQUAL "hcc") - target_link_libraries(hipcub - INTERFACE - rocprim_hip - ) -else() - # On CUDA platform we don't want to export hipcub targer with - # INTERFACE_INCLUDE_DIRECTORIES property containing a path to - # CUB library headers. - # hipcub_cub target is only for internal use. - add_library(hipcub_cub INTERFACE) - target_link_libraries(hipcub_cub - INTERFACE - hipcub - ) - target_include_directories(hipcub_cub SYSTEM - INTERFACE - ${CUB_INCLUDE_DIR} - ) -endif() - -# Installation - -include(GNUInstallDirs) -set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) - -# We need to install headers manually as rocm_install_targets -# does not support header-only libraries (INTERFACE targets) -rocm_install_targets( - TARGETS hipcub - EXPORT hipcub-targets -# INCLUDE -# ${CMAKE_SOURCE_DIR}/hipcub/include -# ${CMAKE_BINARY_DIR}/hipcub/include - PREFIX hipcub -) -install( - DIRECTORY - "include/" - "${PROJECT_BINARY_DIR}/hipcub/include/" - DESTINATION hipcub/include/ - FILES_MATCHING - PATTERN "*.h" - PATTERN "*.hpp" - PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ -) - -# Export targets -rocm_export_targets( - TARGETS hipcub-targets - NAME hipcub - PREFIX hipcub - NAMESPACE hip:: -) - -# Create symlinks -rocm_install_symlink_subdir(hipcub) diff --git a/hipcub/include/hipcub/config.hpp b/hipcub/include/hipcub/config.hpp deleted file mode 100644 index 7eede8455..000000000 --- a/hipcub/include/hipcub/config.hpp +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_CONFIG_HPP_ -#define HIPCUB_CONFIG_HPP_ - -#include - -#define HIPCUB_NAMESPACE hipcub - -#define BEGIN_HIPCUB_NAMESPACE \ - namespace hipcub { - -#define END_HIPCUB_NAMESPACE \ - } /* hipcub */ - -#ifdef __HIP_PLATFORM_HCC__ - #ifndef ROCPRIM_HIP_API - #define ROCPRIM_HIP_API - #endif // ROCPRIM_HIP_API - #include - - #define HIPCUB_ROCPRIM_API 1 - #define HIPCUB_DEVICE __device__ - #define HIPCUB_HOST __host__ - #define HIPCUB_HOST_DEVICE __host__ __device__ - #define HIPCUB_RUNTIME_FUNCTION __host__ - #define HIPCUB_SHARED_MEMORY __shared__ -#elif defined(__HIP_PLATFORM_NVCC__) - // Block - #include - #include - #include - #include - #include - #include - #include - #include - #include - - // Thread - #include - #include - #include - #include - #include - - // Warp - #include - #include - - // Iterator - #include - #include - #include - #include - #include - #include - #include - #include - - // Util - #include - #include - #include - #include - #include - #include - - #define HIPCUB_CUB_API 1 - #define HIPCUB_DEVICE __device__ - #define HIPCUB_HOST __host__ - #define HIPCUB_HOST_DEVICE __host__ __device__ - #define HIPCUB_RUNTIME_FUNCTION CUB_RUNTIME_FUNCTION - #define HIPCUB_SHARED_MEMORY __shared__ -#endif - -#endif // HIPCUB_CONFIG_HPP_ diff --git a/hipcub/include/hipcub/cub/device/device_histogram.hpp b/hipcub/include/hipcub/cub/device/device_histogram.hpp deleted file mode 100644 index 5096c7f91..000000000 --- a/hipcub/include/hipcub/cub/device/device_histogram.hpp +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_CUB_DEVICE_DEVICE_HISTOGRAM_HPP_ -#define HIPCUB_CUB_DEVICE_DEVICE_HISTOGRAM_HPP_ - -#include "../../config.hpp" - -#include - -BEGIN_HIPCUB_NAMESPACE - -struct DeviceHistogram -{ - template< - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t HistogramEven(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram, - int num_levels, - LevelT lower_level, - LevelT upper_level, - OffsetT num_samples, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceHistogram::HistogramEven( - d_temp_storage, temp_storage_bytes, - d_samples, - d_histogram, - num_levels, lower_level, upper_level, - num_samples, - stream, debug_synchronous - ) - ); - } - - template< - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t HistogramEven(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram, - int num_levels, - LevelT lower_level, - LevelT upper_level, - OffsetT num_row_samples, - OffsetT num_rows, - size_t row_stride_bytes, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceHistogram::HistogramEven( - d_temp_storage, temp_storage_bytes, - d_samples, - d_histogram, - num_levels, lower_level, upper_level, - num_row_samples, num_rows, row_stride_bytes, - stream, debug_synchronous - ) - ); - } - - template< - int NUM_CHANNELS, - int NUM_ACTIVE_CHANNELS, - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t MultiHistogramEven(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram[NUM_ACTIVE_CHANNELS], - int num_levels[NUM_ACTIVE_CHANNELS], - LevelT lower_level[NUM_ACTIVE_CHANNELS], - LevelT upper_level[NUM_ACTIVE_CHANNELS], - OffsetT num_pixels, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceHistogram::MultiHistogramEven( - d_temp_storage, temp_storage_bytes, - d_samples, - d_histogram, - num_levels, lower_level, upper_level, - num_pixels, - stream, debug_synchronous - ) - ); - } - - template< - int NUM_CHANNELS, - int NUM_ACTIVE_CHANNELS, - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t MultiHistogramEven(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram[NUM_ACTIVE_CHANNELS], - int num_levels[NUM_ACTIVE_CHANNELS], - LevelT lower_level[NUM_ACTIVE_CHANNELS], - LevelT upper_level[NUM_ACTIVE_CHANNELS], - OffsetT num_row_pixels, - OffsetT num_rows, - size_t row_stride_bytes, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceHistogram::MultiHistogramEven( - d_temp_storage, temp_storage_bytes, - d_samples, - d_histogram, - num_levels, lower_level, upper_level, - num_row_pixels, num_rows, row_stride_bytes, - stream, debug_synchronous - ) - ); - } - - template< - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t HistogramRange(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram, - int num_levels, - LevelT * d_levels, - OffsetT num_samples, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceHistogram::HistogramRange( - d_temp_storage, temp_storage_bytes, - d_samples, - d_histogram, - num_levels, d_levels, - num_samples, - stream, debug_synchronous - ) - ); - } - - template< - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t HistogramRange(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram, - int num_levels, - LevelT * d_levels, - OffsetT num_row_samples, - OffsetT num_rows, - size_t row_stride_bytes, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceHistogram::HistogramRange( - d_temp_storage, temp_storage_bytes, - d_samples, - d_histogram, - num_levels, d_levels, - num_row_samples, num_rows, row_stride_bytes, - stream, debug_synchronous - ) - ); - } - - template< - int NUM_CHANNELS, - int NUM_ACTIVE_CHANNELS, - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t MultiHistogramRange(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram[NUM_ACTIVE_CHANNELS], - int num_levels[NUM_ACTIVE_CHANNELS], - LevelT * d_levels[NUM_ACTIVE_CHANNELS], - OffsetT num_pixels, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceHistogram::MultiHistogramRange( - d_temp_storage, temp_storage_bytes, - d_samples, - d_histogram, - num_levels, d_levels, - num_pixels, - stream, debug_synchronous - ) - ); - } - - template< - int NUM_CHANNELS, - int NUM_ACTIVE_CHANNELS, - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t MultiHistogramRange(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram[NUM_ACTIVE_CHANNELS], - int num_levels[NUM_ACTIVE_CHANNELS], - LevelT * d_levels[NUM_ACTIVE_CHANNELS], - OffsetT num_row_pixels, - OffsetT num_rows, - size_t row_stride_bytes, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceHistogram::MultiHistogramRange( - d_temp_storage, temp_storage_bytes, - d_samples, - d_histogram, - num_levels, d_levels, - num_row_pixels, num_rows, row_stride_bytes, - stream, debug_synchronous - ) - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_CUB_DEVICE_DEVICE_HISTOGRAM_HPP_ diff --git a/hipcub/include/hipcub/cub/device/device_radix_sort.hpp b/hipcub/include/hipcub/cub/device/device_radix_sort.hpp deleted file mode 100644 index 6ef0a2799..000000000 --- a/hipcub/include/hipcub/cub/device/device_radix_sort.hpp +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_CUB_DEVICE_DEVICE_RADIX_SORT_HPP_ -#define HIPCUB_CUB_DEVICE_DEVICE_RADIX_SORT_HPP_ - -#include "../../config.hpp" - -#include - -BEGIN_HIPCUB_NAMESPACE - -struct DeviceRadixSort -{ - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairs(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - const ValueT * d_values_in, - ValueT * d_values_out, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceRadixSort::SortPairs( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, - d_values_in, d_values_out, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairs(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceRadixSort::SortPairs( - d_temp_storage, temp_storage_bytes, - d_keys, d_values, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairsDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - const ValueT * d_values_in, - ValueT * d_values_out, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceRadixSort::SortPairsDescending( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, - d_values_in, d_values_out, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairsDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceRadixSort::SortPairsDescending( - d_temp_storage, temp_storage_bytes, - d_keys, d_values, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeys(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceRadixSort::SortKeys( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeys(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceRadixSort::SortKeys( - d_temp_storage, temp_storage_bytes, - d_keys, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeysDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceRadixSort::SortKeysDescending( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeysDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceRadixSort::SortKeysDescending( - d_temp_storage, temp_storage_bytes, - d_keys, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_CUB_DEVICE_DEVICE_RADIX_SORT_HPP_ diff --git a/hipcub/include/hipcub/cub/device/device_reduce.hpp b/hipcub/include/hipcub/cub/device/device_reduce.hpp deleted file mode 100644 index 816d141be..000000000 --- a/hipcub/include/hipcub/cub/device/device_reduce.hpp +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_CUB_DEVICE_DEVICE_REDUCE_HPP_ -#define HIPCUB_CUB_DEVICE_DEVICE_REDUCE_HPP_ - -#include "../../config.hpp" - -#include - -BEGIN_HIPCUB_NAMESPACE - -class DeviceReduce -{ -public: - template < - typename InputIteratorT, - typename OutputIteratorT, - typename ReduceOpT, - typename T - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Reduce(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - ReduceOpT reduction_op, - T init, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceReduce::Reduce( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_items, - reduction_op, init, - stream, debug_synchronous - ) - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Sum(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceReduce::Sum( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_items, - stream, debug_synchronous - ) - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Min(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceReduce::Min( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_items, - stream, debug_synchronous - ) - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ArgMin(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceReduce::ArgMin( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_items, - stream, debug_synchronous - ) - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Max(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceReduce::Max( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_items, - stream, debug_synchronous - ) - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ArgMax(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceReduce::ArgMax( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_items, - stream, debug_synchronous - ) - ); - } - - template< - typename KeysInputIteratorT, - typename UniqueOutputIteratorT, - typename ValuesInputIteratorT, - typename AggregatesOutputIteratorT, - typename NumRunsOutputIteratorT, - typename ReductionOpT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ReduceByKey(void * d_temp_storage, - size_t& temp_storage_bytes, - KeysInputIteratorT d_keys_in, - UniqueOutputIteratorT d_unique_out, - ValuesInputIteratorT d_values_in, - AggregatesOutputIteratorT d_aggregates_out, - NumRunsOutputIteratorT d_num_runs_out, - ReductionOpT reduction_op, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceReduce::ReduceByKey( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_unique_out, - d_values_in, d_aggregates_out, - d_num_runs_out, reduction_op, num_items, - stream, debug_synchronous - ) - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_CUB_DEVICE_DEVICE_REDUCE_HPP_ diff --git a/hipcub/include/hipcub/cub/device/device_run_length_encode.hpp b/hipcub/include/hipcub/cub/device/device_run_length_encode.hpp deleted file mode 100644 index 3f9ed314d..000000000 --- a/hipcub/include/hipcub/cub/device/device_run_length_encode.hpp +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_CUB_DEVICE_DEVICE_RUN_LENGTH_ENCODE_HPP_ -#define HIPCUB_CUB_DEVICE_DEVICE_RUN_LENGTH_ENCODE_HPP_ - -#include "../../config.hpp" - -#include - -BEGIN_HIPCUB_NAMESPACE - -class DeviceRunLengthEncode -{ -public: - template< - typename InputIteratorT, - typename UniqueOutputIteratorT, - typename LengthsOutputIteratorT, - typename NumRunsOutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Encode(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - UniqueOutputIteratorT d_unique_out, - LengthsOutputIteratorT d_counts_out, - NumRunsOutputIteratorT d_num_runs_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceRunLengthEncode::Encode( - d_temp_storage, temp_storage_bytes, - d_in, - d_unique_out, d_counts_out, d_num_runs_out, - num_items, - stream, debug_synchronous - ) - ); - } - - template< - typename InputIteratorT, - typename OffsetsOutputIteratorT, - typename LengthsOutputIteratorT, - typename NumRunsOutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t NonTrivialRuns(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OffsetsOutputIteratorT d_offsets_out, - LengthsOutputIteratorT d_lengths_out, - NumRunsOutputIteratorT d_num_runs_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceRunLengthEncode::NonTrivialRuns( - d_temp_storage, temp_storage_bytes, - d_in, - d_offsets_out, d_lengths_out, d_num_runs_out, - num_items, - stream, debug_synchronous - ) - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_CUB_DEVICE_DEVICE_RUN_LENGTH_ENCODE_HPP_ diff --git a/hipcub/include/hipcub/cub/device/device_scan.hpp b/hipcub/include/hipcub/cub/device/device_scan.hpp deleted file mode 100644 index e4843d49c..000000000 --- a/hipcub/include/hipcub/cub/device/device_scan.hpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_CUB_DEVICE_DEVICE_SCAN_HPP_ -#define HIPCUB_CUB_DEVICE_DEVICE_SCAN_HPP_ - -#include "../../config.hpp" - -#include - -BEGIN_HIPCUB_NAMESPACE - -class DeviceScan -{ -public: - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t InclusiveSum(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceScan::InclusiveSum( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_items, - stream, debug_synchronous - ) - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT, - typename ScanOpT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t InclusiveScan(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - ScanOpT scan_op, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceScan::InclusiveScan( - d_temp_storage, temp_storage_bytes, - d_in, d_out, scan_op, num_items, - stream, debug_synchronous - ) - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ExclusiveSum(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceScan::ExclusiveSum( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_items, - stream, debug_synchronous - ) - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT, - typename ScanOpT, - typename InitValueT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ExclusiveScan(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - ScanOpT scan_op, - InitValueT init_value, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceScan::ExclusiveScan( - d_temp_storage, temp_storage_bytes, - d_in, d_out, scan_op, init_value, num_items, - stream, debug_synchronous - ) - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_CUB_DEVICE_DEVICE_SCAN_HPP_ diff --git a/hipcub/include/hipcub/cub/device/device_segmented_radix_sort.hpp b/hipcub/include/hipcub/cub/device/device_segmented_radix_sort.hpp deleted file mode 100644 index 03839b110..000000000 --- a/hipcub/include/hipcub/cub/device/device_segmented_radix_sort.hpp +++ /dev/null @@ -1,255 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_CUB_DEVICE_DEVICE_SEGMENTED_RADIX_SORT_HPP_ -#define HIPCUB_CUB_DEVICE_DEVICE_SEGMENTED_RADIX_SORT_HPP_ - -#include "../../config.hpp" - -#include - -BEGIN_HIPCUB_NAMESPACE - -struct DeviceSegmentedRadixSort -{ - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairs(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - const ValueT * d_values_in, - ValueT * d_values_out, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedRadixSort::SortPairs( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, - d_values_in, d_values_out, - num_items, num_segments, - d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairs(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedRadixSort::SortPairs( - d_temp_storage, temp_storage_bytes, - d_keys, d_values, - num_items, num_segments, - d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairsDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - const ValueT * d_values_in, - ValueT * d_values_out, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedRadixSort::SortPairsDescending( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, - d_values_in, d_values_out, - num_items, num_segments, - d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairsDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedRadixSort::SortPairsDescending( - d_temp_storage, temp_storage_bytes, - d_keys, d_values, - num_items, num_segments, - d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeys(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedRadixSort::SortKeys( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, - num_items, num_segments, - d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeys(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedRadixSort::SortKeys( - d_temp_storage, temp_storage_bytes, - d_keys, - num_items, num_segments, - d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeysDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedRadixSort::SortKeysDescending( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, - num_items, num_segments, - d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeysDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedRadixSort::SortKeysDescending( - d_temp_storage, temp_storage_bytes, - d_keys, - num_items, num_segments, - d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ) - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_CUB_DEVICE_DEVICE_SEGMENTED_RADIX_SORT_HPP_ diff --git a/hipcub/include/hipcub/cub/device/device_segmented_reduce.hpp b/hipcub/include/hipcub/cub/device/device_segmented_reduce.hpp deleted file mode 100644 index 5266d08e7..000000000 --- a/hipcub/include/hipcub/cub/device/device_segmented_reduce.hpp +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_CUB_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_ -#define HIPCUB_CUB_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_ - -#include "../../config.hpp" - -#include - -BEGIN_HIPCUB_NAMESPACE - -struct DeviceSegmentedReduce -{ - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT, - typename ReductionOp, - typename T - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Reduce(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - ReductionOp reduction_op, - T initial_value, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedReduce::Reduce( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_segments, - d_begin_offsets, d_end_offsets, - reduction_op, initial_value, - stream, debug_synchronous - ) - ); - } - - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Sum(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedReduce::Sum( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_segments, - d_begin_offsets, d_end_offsets, - stream, debug_synchronous - ) - ); - } - - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Min(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedReduce::Min( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_segments, - d_begin_offsets, d_end_offsets, - stream, debug_synchronous - ) - ); - } - - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ArgMin(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedReduce::ArgMin( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_segments, - d_begin_offsets, d_end_offsets, - stream, debug_synchronous - ) - ); - } - - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Max(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedReduce::Max( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_segments, - d_begin_offsets, d_end_offsets, - stream, debug_synchronous - ) - ); - } - - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ArgMax(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSegmentedReduce::ArgMax( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_segments, - d_begin_offsets, d_end_offsets, - stream, debug_synchronous - ) - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_CUB_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_ diff --git a/hipcub/include/hipcub/cub/device/device_select.hpp b/hipcub/include/hipcub/cub/device/device_select.hpp deleted file mode 100644 index 21cbd9bc9..000000000 --- a/hipcub/include/hipcub/cub/device/device_select.hpp +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_CUB_DEVICE_DEVICE_SELECT_HPP_ -#define HIPCUB_CUB_DEVICE_DEVICE_SELECT_HPP_ - -#include "../../config.hpp" - -#include - -BEGIN_HIPCUB_NAMESPACE - -class DeviceSelect -{ -public: - template < - typename InputIteratorT, - typename FlagIterator, - typename OutputIteratorT, - typename NumSelectedIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Flagged(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - FlagIterator d_flags, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSelect::Flagged( - d_temp_storage, temp_storage_bytes, - d_in, d_flags, - d_out, d_num_selected_out, num_items, - stream, debug_synchronous - ) - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT, - typename NumSelectedIteratorT, - typename SelectOp - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t If(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - int num_items, - SelectOp select_op, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSelect::If( - d_temp_storage, temp_storage_bytes, - d_in, d_out, d_num_selected_out, - num_items, select_op, - stream, debug_synchronous - ) - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT, - typename NumSelectedIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Unique(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return hipCUDAErrorTohipError( - ::cub::DeviceSelect::Unique( - d_temp_storage, temp_storage_bytes, - d_in, d_out, d_num_selected_out, num_items, - stream, debug_synchronous - ) - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_CUB_DEVICE_DEVICE_SELECT_HPP_ diff --git a/hipcub/include/hipcub/cub/hipcub.hpp b/hipcub/include/hipcub/cub/hipcub.hpp deleted file mode 100644 index 73a777211..000000000 --- a/hipcub/include/hipcub/cub/hipcub.hpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_CUB_HIPCUB_HPP_ -#define HIPCUB_CUB_HIPCUB_HPP_ - -#include "../config.hpp" - -#define HIPCUB_WARP_THREADS CUB_PTX_WARP_THREADS -#define HIPCUB_ARCH CUB_PTX_ARCH - -BEGIN_HIPCUB_NAMESPACE - -using namespace cub; - -END_HIPCUB_NAMESPACE - -// Device functions must be wrapped so they return -// hipError_t instead of cudaError_t -#include "device/device_histogram.hpp" -#include "device/device_radix_sort.hpp" -#include "device/device_reduce.hpp" -#include "device/device_run_length_encode.hpp" -#include "device/device_segmented_radix_sort.hpp" -#include "device/device_segmented_reduce.hpp" -#include "device/device_scan.hpp" -#include "device/device_select.hpp" - -#endif // HIPCUB_CUB_HIPCUB_HPP_ diff --git a/hipcub/include/hipcub/hipcub.hpp b/hipcub/include/hipcub/hipcub.hpp deleted file mode 100644 index 36a3ad6d2..000000000 --- a/hipcub/include/hipcub/hipcub.hpp +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_HPP_ -#define HIPCUB_HPP_ - -/// \file -/// -/// Meta-header to include all hipCUB APIs. - -// Meta configuration for hipCUB -#include "config.hpp" -// Version -#include "hipcub_version.hpp" - -#ifdef __HIP_PLATFORM_HCC__ - #include "rocprim/hipcub.hpp" -#elif defined(__HIP_PLATFORM_NVCC__) - #include "cub/hipcub.hpp" -#endif - -#endif // HIPCUB_HPP_ diff --git a/hipcub/include/hipcub/hipcub_version.hpp.in b/hipcub/include/hipcub/hipcub_version.hpp.in deleted file mode 100644 index 9d93b4b17..000000000 --- a/hipcub/include/hipcub/hipcub_version.hpp.in +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_VERSION_HPP_ -#define HIPCUB_VERSION_HPP_ - -/// \def HIPCUB_VERSION -/// \brief HIPCUB library version -/// -/// Version number may not be visible in the documentation. -/// -/// HIPCUB_VERSION % 100 is the patch level, -/// HIPCUB_VERSION / 100 % 1000 is the minor version, -/// HIPCUB_VERSION / 100000 is the major version. -/// -/// For example, if HIPCUB_VERSION is 100500, then the major version is 1, -/// the minor version is 5, and the patch level is 0. -#define HIPCUB_VERSION @rocprim_VERSION_MAJOR@ * 100000 + @rocprim_VERSION_MINOR@ * 100 + @rocprim_VERSION_PATCH@ - -#define HIPCUB_VERSION_MAJOR @rocprim_VERSION_MAJOR@ -#define HIPCUB_VERSION_MINOR @rocprim_VERSION_MINOR@ -#define HIPCUB_VERSION_PATCH @rocprim_VERSION_PATCH@ - -#endif // HIPCUB_VERSION_HPP_ \ No newline at end of file diff --git a/hipcub/include/hipcub/rocprim/block/block_discontinuity.hpp b/hipcub/include/hipcub/rocprim/block/block_discontinuity.hpp deleted file mode 100644 index bb114d7a6..000000000 --- a/hipcub/include/hipcub/rocprim/block/block_discontinuity.hpp +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_DISCONTINUITY_HPP_ -#define HIPCUB_ROCPRIM_BLOCK_BLOCK_DISCONTINUITY_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename T, - int BLOCK_DIM_X, - int BLOCK_DIM_Y = 1, - int BLOCK_DIM_Z = 1, - int ARCH = HIPCUB_ARCH /* ignored */ -> -class BlockDiscontinuity - : private ::rocprim::block_discontinuity< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z - > -{ - static_assert( - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0, - "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0" - ); - - using base_type = - typename ::rocprim::block_discontinuity< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z - >; - - // Reference to temporary storage (usually shared memory) - typename base_type::storage_type& temp_storage_; - -public: - using TempStorage = typename base_type::storage_type; - - HIPCUB_DEVICE inline - BlockDiscontinuity() : temp_storage_(private_storage()) - { - } - - HIPCUB_DEVICE inline - BlockDiscontinuity(TempStorage& temp_storage) : temp_storage_(temp_storage) - { - } - - template - HIPCUB_DEVICE inline - void FlagHeads(FlagT (&head_flags)[ITEMS_PER_THREAD], - T (&input)[ITEMS_PER_THREAD], - FlagOp flag_op) - { - base_type::flag_heads(head_flags, input, flag_op, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void FlagHeads(FlagT (&head_flags)[ITEMS_PER_THREAD], - T (&input)[ITEMS_PER_THREAD], - FlagOp flag_op, - T tile_predecessor_item) - { - base_type::flag_heads(head_flags, tile_predecessor_item, input, flag_op, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void FlagTails(FlagT (&tail_flags)[ITEMS_PER_THREAD], - T (&input)[ITEMS_PER_THREAD], - FlagOp flag_op) - { - base_type::flag_tails(tail_flags, input, flag_op, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void FlagTails(FlagT (&tail_flags)[ITEMS_PER_THREAD], - T (&input)[ITEMS_PER_THREAD], - FlagOp flag_op, - T tile_successor_item) - { - base_type::flag_tails(tail_flags, tile_successor_item, input, flag_op, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void FlagHeadsAndTails(FlagT (&head_flags)[ITEMS_PER_THREAD], - FlagT (&tail_flags)[ITEMS_PER_THREAD], - T (&input)[ITEMS_PER_THREAD], - FlagOp flag_op) - { - base_type::flag_heads_and_tails( - head_flags, tail_flags, input, - flag_op, temp_storage_ - ); - } - - template - HIPCUB_DEVICE inline - void FlagHeadsAndTails(FlagT (&head_flags)[ITEMS_PER_THREAD], - FlagT (&tail_flags)[ITEMS_PER_THREAD], - T tile_successor_item, - T (&input)[ITEMS_PER_THREAD], - FlagOp flag_op) - { - base_type::flag_heads_and_tails( - head_flags, tail_flags, tile_successor_item, input, - flag_op, temp_storage_ - ); - } - - template - HIPCUB_DEVICE inline - void FlagHeadsAndTails(FlagT (&head_flags)[ITEMS_PER_THREAD], - T tile_predecessor_item, - FlagT (&tail_flags)[ITEMS_PER_THREAD], - T (&input)[ITEMS_PER_THREAD], - FlagOp flag_op) - { - base_type::flag_heads_and_tails( - head_flags, tile_predecessor_item, tail_flags, input, - flag_op, temp_storage_ - ); - } - - template - HIPCUB_DEVICE inline - void FlagHeadsAndTails(FlagT (&head_flags)[ITEMS_PER_THREAD], - T tile_predecessor_item, - FlagT (&tail_flags)[ITEMS_PER_THREAD], - T tile_successor_item, - T (&input)[ITEMS_PER_THREAD], - FlagOp flag_op) - { - base_type::flag_heads_and_tails( - head_flags, tile_predecessor_item, tail_flags, tile_successor_item, input, - flag_op, temp_storage_ - ); - } - -private: - HIPCUB_DEVICE inline - TempStorage& private_storage() - { - HIPCUB_SHARED_MEMORY TempStorage private_storage; - return private_storage; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_DISCONTINUITY_HPP_ diff --git a/hipcub/include/hipcub/rocprim/block/block_exchange.hpp b/hipcub/include/hipcub/rocprim/block/block_exchange.hpp deleted file mode 100644 index 465fb50d1..000000000 --- a/hipcub/include/hipcub/rocprim/block/block_exchange.hpp +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_EXCHANGE_HPP_ -#define HIPCUB_ROCPRIM_BLOCK_BLOCK_EXCHANGE_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename InputT, - int BLOCK_DIM_X, - int ITEMS_PER_THREAD, - bool WARP_TIME_SLICING = false, /* ignored */ - int BLOCK_DIM_Y = 1, - int BLOCK_DIM_Z = 1, - int ARCH = HIPCUB_ARCH /* ignored */ -> -class BlockExchange - : private ::rocprim::block_exchange< - InputT, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - ITEMS_PER_THREAD - > -{ - static_assert( - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0, - "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0" - ); - - using base_type = - typename ::rocprim::block_exchange< - InputT, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - ITEMS_PER_THREAD - >; - - // Reference to temporary storage (usually shared memory) - typename base_type::storage_type& temp_storage_; - -public: - using TempStorage = typename base_type::storage_type; - - HIPCUB_DEVICE inline - BlockExchange() : temp_storage_(private_storage()) - { - } - - HIPCUB_DEVICE inline - BlockExchange(TempStorage& temp_storage) : temp_storage_(temp_storage) - { - } - - template - HIPCUB_DEVICE inline - void StripedToBlocked(InputT (&input_items)[ITEMS_PER_THREAD], - OutputT (&output_items)[ITEMS_PER_THREAD]) - { - base_type::striped_to_blocked(input_items, output_items, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void BlockedToStriped(InputT (&input_items)[ITEMS_PER_THREAD], - OutputT (&output_items)[ITEMS_PER_THREAD]) - { - base_type::blocked_to_striped(input_items, output_items, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void WarpStripedToBlocked(InputT (&input_items)[ITEMS_PER_THREAD], - OutputT (&output_items)[ITEMS_PER_THREAD]) - { - base_type::warp_striped_to_blocked(input_items, output_items, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void BlockedToWarpStriped(InputT (&input_items)[ITEMS_PER_THREAD], - OutputT (&output_items)[ITEMS_PER_THREAD]) - { - base_type::blocked_to_warp_striped(input_items, output_items, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void ScatterToBlocked(InputT (&input_items)[ITEMS_PER_THREAD], - OutputT (&output_items)[ITEMS_PER_THREAD], - OffsetT (&ranks)[ITEMS_PER_THREAD]) - { - base_type::scatter_to_blocked(input_items, output_items, ranks, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void ScatterToStriped(InputT (&input_items)[ITEMS_PER_THREAD], - OutputT (&output_items)[ITEMS_PER_THREAD], - OffsetT (&ranks)[ITEMS_PER_THREAD]) - { - base_type::scatter_to_striped(input_items, output_items, ranks, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void ScatterToStripedGuarded(InputT (&input_items)[ITEMS_PER_THREAD], - OutputT (&output_items)[ITEMS_PER_THREAD], - OffsetT (&ranks)[ITEMS_PER_THREAD]) - { - base_type::scatter_to_striped_guarded(input_items, output_items, ranks, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void ScatterToStripedFlagged(InputT (&input_items)[ITEMS_PER_THREAD], - OutputT (&output_items)[ITEMS_PER_THREAD], - OffsetT (&ranks)[ITEMS_PER_THREAD], - ValidFlag (&is_valid)[ITEMS_PER_THREAD]) - { - base_type::scatter_to_striped_flagged(input_items, output_items, ranks, is_valid, temp_storage_); - } - -private: - HIPCUB_DEVICE inline - TempStorage& private_storage() - { - HIPCUB_SHARED_MEMORY TempStorage private_storage; - return private_storage; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_EXCHANGE_HPP_ diff --git a/hipcub/include/hipcub/rocprim/block/block_histogram.hpp b/hipcub/include/hipcub/rocprim/block/block_histogram.hpp deleted file mode 100644 index 2777fa3e3..000000000 --- a/hipcub/include/hipcub/rocprim/block/block_histogram.hpp +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_ -#define HIPCUB_ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_ - -#include - -#include "../../config.hpp" - -#include "../thread/thread_operators.hpp" - -BEGIN_HIPCUB_NAMESPACE - -namespace detail -{ - inline constexpr - typename std::underlying_type<::rocprim::block_histogram_algorithm>::type - to_BlockHistogramAlgorithm_enum(::rocprim::block_histogram_algorithm v) - { - using utype = std::underlying_type<::rocprim::block_histogram_algorithm>::type; - return static_cast(v); - } -} - -enum BlockHistogramAlgorithm -{ - BLOCK_HISTO_ATOMIC - = detail::to_BlockHistogramAlgorithm_enum(::rocprim::block_histogram_algorithm::using_atomic), - BLOCK_HISTO_SORT - = detail::to_BlockHistogramAlgorithm_enum(::rocprim::block_histogram_algorithm::using_sort) -}; - -template< - typename T, - int BLOCK_DIM_X, - int ITEMS_PER_THREAD, - int BINS, - BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, - int BLOCK_DIM_Y = 1, - int BLOCK_DIM_Z = 1, - int ARCH = HIPCUB_ARCH /* ignored */ -> -class BlockHistogram - : private ::rocprim::block_histogram< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - ITEMS_PER_THREAD, - BINS, - static_cast<::rocprim::block_histogram_algorithm>(ALGORITHM) - > -{ - static_assert( - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0, - "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0" - ); - - using base_type = - typename ::rocprim::block_histogram< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - ITEMS_PER_THREAD, - BINS, - static_cast<::rocprim::block_histogram_algorithm>(ALGORITHM) - >; - - // Reference to temporary storage (usually shared memory) - typename base_type::storage_type& temp_storage_; - -public: - using TempStorage = typename base_type::storage_type; - - HIPCUB_DEVICE inline - BlockHistogram() : temp_storage_(private_storage()) - { - } - - HIPCUB_DEVICE inline - BlockHistogram(TempStorage& temp_storage) : temp_storage_(temp_storage) - { - } - - template - HIPCUB_DEVICE inline - void InitHistogram(CounterT histogram[BINS]) - { - base_type::init_histogram(histogram); - } - - template - HIPCUB_DEVICE inline - void Composite(T (&items)[ITEMS_PER_THREAD], - CounterT histogram[BINS]) - { - base_type::composite(items, histogram, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void Histogram(T (&items)[ITEMS_PER_THREAD], - CounterT histogram[BINS]) - { - base_type::init_histogram(histogram); - CTA_SYNC(); - base_type::composite(items, histogram, temp_storage_); - } - -private: - HIPCUB_DEVICE inline - TempStorage& private_storage() - { - HIPCUB_SHARED_MEMORY TempStorage private_storage; - return private_storage; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_ diff --git a/hipcub/include/hipcub/rocprim/block/block_load.hpp b/hipcub/include/hipcub/rocprim/block/block_load.hpp deleted file mode 100644 index fefec516d..000000000 --- a/hipcub/include/hipcub/rocprim/block/block_load.hpp +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_LOAD_HPP_ -#define HIPCUB_ROCPRIM_BLOCK_BLOCK_LOAD_HPP_ - -#include - -#include "../../config.hpp" - -#include "block_load_func.hpp" - -BEGIN_HIPCUB_NAMESPACE - -namespace detail -{ - inline constexpr - typename std::underlying_type<::rocprim::block_load_method>::type - to_BlockLoadAlgorithm_enum(::rocprim::block_load_method v) - { - using utype = std::underlying_type<::rocprim::block_load_method>::type; - return static_cast(v); - } -} - -enum BlockLoadAlgorithm -{ - BLOCK_LOAD_DIRECT - = detail::to_BlockLoadAlgorithm_enum(::rocprim::block_load_method::block_load_direct), - BLOCK_LOAD_VECTORIZE - = detail::to_BlockLoadAlgorithm_enum(::rocprim::block_load_method::block_load_vectorize), - BLOCK_LOAD_TRANSPOSE - = detail::to_BlockLoadAlgorithm_enum(::rocprim::block_load_method::block_load_transpose), - BLOCK_LOAD_WARP_TRANSPOSE - = detail::to_BlockLoadAlgorithm_enum(::rocprim::block_load_method::block_load_warp_transpose), - BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED - = detail::to_BlockLoadAlgorithm_enum(::rocprim::block_load_method::block_load_warp_transpose) -}; - -template< - typename T, - int BLOCK_DIM_X, - int ITEMS_PER_THREAD, - BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT, - int BLOCK_DIM_Y = 1, - int BLOCK_DIM_Z = 1, - int ARCH = HIPCUB_ARCH /* ignored */ -> -class BlockLoad - : private ::rocprim::block_load< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - ITEMS_PER_THREAD, - static_cast<::rocprim::block_load_method>(ALGORITHM) - > -{ - static_assert( - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0, - "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0" - ); - - using base_type = - typename ::rocprim::block_load< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - ITEMS_PER_THREAD, - static_cast<::rocprim::block_load_method>(ALGORITHM) - >; - - // Reference to temporary storage (usually shared memory) - typename base_type::storage_type& temp_storage_; - -public: - using TempStorage = typename base_type::storage_type; - - HIPCUB_DEVICE inline - BlockLoad() : temp_storage_(private_storage()) - { - } - - HIPCUB_DEVICE inline - BlockLoad(TempStorage& temp_storage) : temp_storage_(temp_storage) - { - } - - template - HIPCUB_DEVICE inline - void Load(InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD]) - { - base_type::load(block_iter, items, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void Load(InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items) - { - base_type::load(block_iter, items, valid_items, temp_storage_); - } - - template< - class InputIteratorT, - class Default - > - HIPCUB_DEVICE inline - void Load(InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items, - Default oob_default) - { - base_type::load(block_iter, items, valid_items, oob_default, temp_storage_); - } - -private: - HIPCUB_DEVICE inline - TempStorage& private_storage() - { - HIPCUB_SHARED_MEMORY TempStorage private_storage; - return private_storage; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_LOAD_HPP_ diff --git a/hipcub/include/hipcub/rocprim/block/block_load_func.hpp b/hipcub/include/hipcub/rocprim/block/block_load_func.hpp deleted file mode 100644 index 6590b0320..000000000 --- a/hipcub/include/hipcub/rocprim/block/block_load_func.hpp +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_LOAD_FUNC_HPP_ -#define HIPCUB_ROCPRIM_BLOCK_BLOCK_LOAD_FUNC_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorT -> -HIPCUB_DEVICE inline -void LoadDirectBlocked(int linear_id, - InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD]) -{ - ::rocprim::block_load_direct_blocked( - linear_id, block_iter, items - ); -} - -template< - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorT -> -HIPCUB_DEVICE inline -void LoadDirectBlocked(int linear_id, - InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items) -{ - ::rocprim::block_load_direct_blocked( - linear_id, block_iter, items, valid_items - ); -} - -template< - typename T, - typename Default, - int ITEMS_PER_THREAD, - typename InputIteratorT -> -HIPCUB_DEVICE inline -void LoadDirectBlocked(int linear_id, - InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items, - Default oob_default) -{ - ::rocprim::block_load_direct_blocked( - linear_id, block_iter, items, valid_items, oob_default - ); -} - -template < - typename T, - int ITEMS_PER_THREAD -> -HIPCUB_DEVICE inline -void LoadDirectBlockedVectorized(int linear_id, - T* block_iter, - T (&items)[ITEMS_PER_THREAD]) -{ - ::rocprim::block_load_direct_blocked_vectorized( - linear_id, block_iter, items - ); -} - -template< - int BLOCK_THREADS, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorT -> -HIPCUB_DEVICE inline -void LoadDirectStriped(int linear_id, - InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD]) -{ - ::rocprim::block_load_direct_striped( - linear_id, block_iter, items - ); -} - -template< - int BLOCK_THREADS, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorT -> -HIPCUB_DEVICE inline -void LoadDirectStriped(int linear_id, - InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items) -{ - ::rocprim::block_load_direct_striped( - linear_id, block_iter, items, valid_items - ); -} - -template< - int BLOCK_THREADS, - typename T, - typename Default, - int ITEMS_PER_THREAD, - typename InputIteratorT -> -HIPCUB_DEVICE inline -void LoadDirectStriped(int linear_id, - InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items, - Default oob_default) -{ - ::rocprim::block_load_direct_striped( - linear_id, block_iter, items, valid_items, oob_default - ); -} - -template< - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorT -> -HIPCUB_DEVICE inline -void LoadDirectWarpStriped(int linear_id, - InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD]) -{ - ::rocprim::block_load_direct_warp_striped( - linear_id, block_iter, items - ); -} - -template< - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorT -> -HIPCUB_DEVICE inline -void LoadDirectWarpStriped(int linear_id, - InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items) -{ - ::rocprim::block_load_direct_warp_striped( - linear_id, block_iter, items, valid_items - ); -} - -template< - typename T, - typename Default, - int ITEMS_PER_THREAD, - typename InputIteratorT -> -HIPCUB_DEVICE inline -void LoadDirectWarpStriped(int linear_id, - InputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items, - Default oob_default) -{ - ::rocprim::block_load_direct_warp_striped( - linear_id, block_iter, items, valid_items, oob_default - ); -} - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_LOAD_FUNC_HPP_ diff --git a/hipcub/include/hipcub/rocprim/block/block_radix_sort.hpp b/hipcub/include/hipcub/rocprim/block/block_radix_sort.hpp deleted file mode 100644 index 6f7b87654..000000000 --- a/hipcub/include/hipcub/rocprim/block/block_radix_sort.hpp +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_RADIX_SORT_HPP_ -#define HIPCUB_ROCPRIM_BLOCK_BLOCK_RADIX_SORT_HPP_ - -#include "../../config.hpp" - -#include "../util_type.hpp" - -#include "block_scan.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename KeyT, - int BLOCK_DIM_X, - int ITEMS_PER_THREAD, - typename ValueT = NullType, - int RADIX_BITS = 4, /* ignored */ - bool MEMOIZE_OUTER_SCAN = true, /* ignored */ - BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, /* ignored */ - hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, /* ignored */ - int BLOCK_DIM_Y = 1, - int BLOCK_DIM_Z = 1, - int PTX_ARCH = HIPCUB_ARCH /* ignored */ -> -class BlockRadixSort - : private ::rocprim::block_radix_sort< - KeyT, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - ITEMS_PER_THREAD, - ValueT - > -{ - static_assert( - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0, - "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0" - ); - - using base_type = - typename ::rocprim::block_radix_sort< - KeyT, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - ITEMS_PER_THREAD, - ValueT - >; - - // Reference to temporary storage (usually shared memory) - typename base_type::storage_type& temp_storage_; - -public: - using TempStorage = typename base_type::storage_type; - - HIPCUB_DEVICE inline - BlockRadixSort() : temp_storage_(private_storage()) - { - } - - HIPCUB_DEVICE inline - BlockRadixSort(TempStorage& temp_storage) : temp_storage_(temp_storage) - { - } - - HIPCUB_DEVICE inline - void Sort(KeyT (&keys)[ITEMS_PER_THREAD], - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8) - { - base_type::sort(keys, temp_storage_, begin_bit, end_bit); - } - - HIPCUB_DEVICE inline - void Sort(KeyT (&keys)[ITEMS_PER_THREAD], - ValueT (&values)[ITEMS_PER_THREAD], - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8) - { - base_type::sort(keys, values, temp_storage_, begin_bit, end_bit); - } - - HIPCUB_DEVICE inline - void SortDescending(KeyT (&keys)[ITEMS_PER_THREAD], - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8) - { - base_type::sort_desc(keys, temp_storage_, begin_bit, end_bit); - } - - HIPCUB_DEVICE inline - void SortDescending(KeyT (&keys)[ITEMS_PER_THREAD], - ValueT (&values)[ITEMS_PER_THREAD], - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8) - { - base_type::sort_desc(keys, values, temp_storage_, begin_bit, end_bit); - } - - HIPCUB_DEVICE inline - void SortBlockedToStriped(KeyT (&keys)[ITEMS_PER_THREAD], - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8) - { - base_type::sort_to_striped(keys, temp_storage_, begin_bit, end_bit); - } - - HIPCUB_DEVICE inline - void SortBlockedToStriped(KeyT (&keys)[ITEMS_PER_THREAD], - ValueT (&values)[ITEMS_PER_THREAD], - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8) - { - base_type::sort_to_striped(keys, values, temp_storage_, begin_bit, end_bit); - } - - HIPCUB_DEVICE inline - void SortDescendingBlockedToStriped(KeyT (&keys)[ITEMS_PER_THREAD], - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8) - { - base_type::sort_desc_to_striped(keys, temp_storage_, begin_bit, end_bit); - } - - HIPCUB_DEVICE inline - void SortDescendingBlockedToStriped(KeyT (&keys)[ITEMS_PER_THREAD], - ValueT (&values)[ITEMS_PER_THREAD], - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8) - { - base_type::sort_desc_to_striped(keys, values, temp_storage_, begin_bit, end_bit); - } - -private: - HIPCUB_DEVICE inline - TempStorage& private_storage() - { - HIPCUB_SHARED_MEMORY TempStorage private_storage; - return private_storage; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_RADIX_SORT_HPP_ diff --git a/hipcub/include/hipcub/rocprim/block/block_reduce.hpp b/hipcub/include/hipcub/rocprim/block/block_reduce.hpp deleted file mode 100644 index 311698668..000000000 --- a/hipcub/include/hipcub/rocprim/block/block_reduce.hpp +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_REDUCE_HPP_ -#define HIPCUB_ROCPRIM_BLOCK_BLOCK_REDUCE_HPP_ - -#include - -#include "../../config.hpp" - -#include "../thread/thread_operators.hpp" - -BEGIN_HIPCUB_NAMESPACE - -namespace detail -{ - inline constexpr - typename std::underlying_type<::rocprim::block_reduce_algorithm>::type - to_BlockReduceAlgorithm_enum(::rocprim::block_reduce_algorithm v) - { - using utype = std::underlying_type<::rocprim::block_reduce_algorithm>::type; - return static_cast(v); - } -} - -enum BlockReduceAlgorithm -{ - BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY - = detail::to_BlockReduceAlgorithm_enum(::rocprim::block_reduce_algorithm::raking_reduce), - BLOCK_REDUCE_RAKING - = detail::to_BlockReduceAlgorithm_enum(::rocprim::block_reduce_algorithm::raking_reduce), - BLOCK_REDUCE_WARP_REDUCTIONS - = detail::to_BlockReduceAlgorithm_enum(::rocprim::block_reduce_algorithm::using_warp_reduce) -}; - -template< - typename T, - int BLOCK_DIM_X, - BlockReduceAlgorithm ALGORITHM = BLOCK_REDUCE_WARP_REDUCTIONS, - int BLOCK_DIM_Y = 1, - int BLOCK_DIM_Z = 1, - int ARCH = HIPCUB_ARCH /* ignored */ -> -class BlockReduce - : private ::rocprim::block_reduce< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - static_cast<::rocprim::block_reduce_algorithm>(ALGORITHM) - > -{ - static_assert( - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0, - "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0" - ); - - using base_type = - typename ::rocprim::block_reduce< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - static_cast<::rocprim::block_reduce_algorithm>(ALGORITHM) - >; - - // Reference to temporary storage (usually shared memory) - typename base_type::storage_type& temp_storage_; - -public: - using TempStorage = typename base_type::storage_type; - - HIPCUB_DEVICE inline - BlockReduce() : temp_storage_(private_storage()) - { - } - - HIPCUB_DEVICE inline - BlockReduce(TempStorage& temp_storage) : temp_storage_(temp_storage) - { - } - - HIPCUB_DEVICE inline - T Sum(T input) - { - base_type::reduce(input, input, temp_storage_); - return input; - } - - HIPCUB_DEVICE inline - T Sum(T input, int valid_items) - { - base_type::reduce(input, input, valid_items, temp_storage_); - return input; - } - - template - HIPCUB_DEVICE inline - T Sum(T(&input)[ITEMS_PER_THREAD]) - { - T output; - base_type::reduce(input, output, temp_storage_); - return output; - } - - template - HIPCUB_DEVICE inline - T Reduce(T input, ReduceOp reduce_op) - { - base_type::reduce(input, input, temp_storage_, reduce_op); - return input; - } - - template - HIPCUB_DEVICE inline - T Reduce(T input, ReduceOp reduce_op, int valid_items) - { - base_type::reduce(input, input, valid_items, temp_storage_, reduce_op); - return input; - } - - template - HIPCUB_DEVICE inline - T Reduce(T(&input)[ITEMS_PER_THREAD], ReduceOp reduce_op) - { - T output; - base_type::reduce(input, output, temp_storage_, reduce_op); - return output; - } - -private: - HIPCUB_DEVICE inline - TempStorage& private_storage() - { - HIPCUB_SHARED_MEMORY TempStorage private_storage; - return private_storage; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_REDUCE_HPP_ diff --git a/hipcub/include/hipcub/rocprim/block/block_scan.hpp b/hipcub/include/hipcub/rocprim/block/block_scan.hpp deleted file mode 100644 index 3555308f5..000000000 --- a/hipcub/include/hipcub/rocprim/block/block_scan.hpp +++ /dev/null @@ -1,302 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_SCAN_HPP_ -#define HIPCUB_ROCPRIM_BLOCK_BLOCK_SCAN_HPP_ - -#include - -#include "../../config.hpp" - -#include "../thread/thread_operators.hpp" - -BEGIN_HIPCUB_NAMESPACE - -namespace detail -{ - inline constexpr - typename std::underlying_type<::rocprim::block_scan_algorithm>::type - to_BlockScanAlgorithm_enum(::rocprim::block_scan_algorithm v) - { - using utype = std::underlying_type<::rocprim::block_scan_algorithm>::type; - return static_cast(v); - } -} - -enum BlockScanAlgorithm -{ - BLOCK_SCAN_RAKING - = detail::to_BlockScanAlgorithm_enum(::rocprim::block_scan_algorithm::reduce_then_scan), - BLOCK_SCAN_RAKING_MEMOIZE - = detail::to_BlockScanAlgorithm_enum(::rocprim::block_scan_algorithm::reduce_then_scan), - BLOCK_SCAN_WARP_SCANS - = detail::to_BlockScanAlgorithm_enum(::rocprim::block_scan_algorithm::using_warp_scan) -}; - -template< - typename T, - int BLOCK_DIM_X, - BlockScanAlgorithm ALGORITHM = BLOCK_SCAN_RAKING, - int BLOCK_DIM_Y = 1, - int BLOCK_DIM_Z = 1, - int ARCH = HIPCUB_ARCH /* ignored */ -> -class BlockScan - : private ::rocprim::block_scan< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - static_cast<::rocprim::block_scan_algorithm>(ALGORITHM) - > -{ - static_assert( - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0, - "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0" - ); - - using base_type = - typename ::rocprim::block_scan< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - static_cast<::rocprim::block_scan_algorithm>(ALGORITHM) - >; - - // Reference to temporary storage (usually shared memory) - typename base_type::storage_type& temp_storage_; - -public: - using TempStorage = typename base_type::storage_type; - - HIPCUB_DEVICE inline - BlockScan() : temp_storage_(private_storage()) - { - } - - HIPCUB_DEVICE inline - BlockScan(TempStorage& temp_storage) : temp_storage_(temp_storage) - { - } - - HIPCUB_DEVICE inline - void InclusiveSum(T input, T& output) - { - base_type::inclusive_scan(input, output, temp_storage_); - } - - HIPCUB_DEVICE inline - void InclusiveSum(T input, T& output, T& block_aggregate) - { - base_type::inclusive_scan(input, output, block_aggregate, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void InclusiveSum(T input, T& output, BlockPrefixCallbackOp& block_prefix_callback_op) - { - base_type::inclusive_scan( - input, output, temp_storage_, block_prefix_callback_op, ::hipcub::Sum() - ); - } - - template - HIPCUB_DEVICE inline - void InclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD]) - { - base_type::inclusive_scan(input, output, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void InclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], - T& block_aggregate) - { - base_type::inclusive_scan(input, output, block_aggregate, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void InclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], - BlockPrefixCallbackOp& block_prefix_callback_op) - { - base_type::inclusive_scan( - input, output, temp_storage_, block_prefix_callback_op, ::hipcub::Sum() - ); - } - - template - HIPCUB_DEVICE inline - void InclusiveScan(T input, T& output, ScanOp scan_op) - { - base_type::inclusive_scan(input, output, temp_storage_, scan_op); - } - - template - HIPCUB_DEVICE inline - void InclusiveScan(T input, T& output, ScanOp scan_op, T& block_aggregate) - { - base_type::inclusive_scan(input, output, block_aggregate, temp_storage_, scan_op); - } - - template - HIPCUB_DEVICE inline - void InclusiveScan(T input, T& output, ScanOp scan_op, BlockPrefixCallbackOp& block_prefix_callback_op) - { - base_type::inclusive_scan( - input, output, temp_storage_, block_prefix_callback_op, scan_op - ); - } - - template - HIPCUB_DEVICE inline - void InclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op) - { - base_type::inclusive_scan(input, output, temp_storage_, scan_op); - } - - template - HIPCUB_DEVICE inline - void InclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], - ScanOp scan_op, T& block_aggregate) - { - base_type::inclusive_scan(input, output, block_aggregate, temp_storage_, scan_op); - } - - template - HIPCUB_DEVICE inline - void InclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], - ScanOp scan_op, BlockPrefixCallbackOp& block_prefix_callback_op) - { - base_type::inclusive_scan( - input, output, temp_storage_, block_prefix_callback_op, scan_op - ); - } - - HIPCUB_DEVICE inline - void ExclusiveSum(T input, T& output) - { - base_type::exclusive_scan(input, output, T(0), temp_storage_); - } - - HIPCUB_DEVICE inline - void ExclusiveSum(T input, T& output, T& block_aggregate) - { - base_type::exclusive_scan(input, output, T(0), block_aggregate, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void ExclusiveSum(T input, T& output, BlockPrefixCallbackOp& block_prefix_callback_op) - { - base_type::exclusive_scan( - input, output, temp_storage_, block_prefix_callback_op, ::hipcub::Sum() - ); - } - - template - HIPCUB_DEVICE inline - void ExclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD]) - { - base_type::exclusive_scan(input, output, T(0), temp_storage_); - } - - template - HIPCUB_DEVICE inline - void ExclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], - T& block_aggregate) - { - base_type::exclusive_scan(input, output, T(0), block_aggregate, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void ExclusiveSum(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], - BlockPrefixCallbackOp& block_prefix_callback_op) - { - base_type::exclusive_scan( - input, output, temp_storage_, block_prefix_callback_op, ::hipcub::Sum() - ); - } - - template - HIPCUB_DEVICE inline - void ExclusiveScan(T input, T& output, T initial_value, ScanOp scan_op) - { - base_type::exclusive_scan(input, output, initial_value, temp_storage_, scan_op); - } - - template - HIPCUB_DEVICE inline - void ExclusiveScan(T input, T& output, T initial_value, - ScanOp scan_op, T& block_aggregate) - { - base_type::exclusive_scan( - input, output, initial_value, block_aggregate, temp_storage_, scan_op - ); - } - - template - HIPCUB_DEVICE inline - void ExclusiveScan(T input, T& output, ScanOp scan_op, - BlockPrefixCallbackOp& block_prefix_callback_op) - { - base_type::exclusive_scan( - input, output, temp_storage_, block_prefix_callback_op, scan_op - ); - } - - template - HIPCUB_DEVICE inline - void ExclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], - T initial_value, ScanOp scan_op) - { - base_type::exclusive_scan(input, output, initial_value, temp_storage_, scan_op); - } - - template - HIPCUB_DEVICE inline - void ExclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], - T initial_value, ScanOp scan_op, T& block_aggregate) - { - base_type::exclusive_scan( - input, output, initial_value, block_aggregate, temp_storage_, scan_op - ); - } - - template - HIPCUB_DEVICE inline - void ExclusiveScan(T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], - ScanOp scan_op, BlockPrefixCallbackOp& block_prefix_callback_op) - { - base_type::exclusive_scan( - input, output, temp_storage_, block_prefix_callback_op, scan_op - ); - } - -private: - HIPCUB_DEVICE inline - TempStorage& private_storage() - { - HIPCUB_SHARED_MEMORY TempStorage private_storage; - return private_storage; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_SCAN_HPP_ diff --git a/hipcub/include/hipcub/rocprim/block/block_store.hpp b/hipcub/include/hipcub/rocprim/block/block_store.hpp deleted file mode 100644 index 5bb15f8fd..000000000 --- a/hipcub/include/hipcub/rocprim/block/block_store.hpp +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_STORE_HPP_ -#define HIPCUB_ROCPRIM_BLOCK_BLOCK_STORE_HPP_ - -#include - -#include "../../config.hpp" - -#include "block_store_func.hpp" - -BEGIN_HIPCUB_NAMESPACE - -namespace detail -{ - inline constexpr - typename std::underlying_type<::rocprim::block_store_method>::type - to_BlockStoreAlgorithm_enum(::rocprim::block_store_method v) - { - using utype = std::underlying_type<::rocprim::block_store_method>::type; - return static_cast(v); - } -} - -enum BlockStoreAlgorithm -{ - BLOCK_STORE_DIRECT - = detail::to_BlockStoreAlgorithm_enum(::rocprim::block_store_method::block_store_direct), - BLOCK_STORE_VECTORIZE - = detail::to_BlockStoreAlgorithm_enum(::rocprim::block_store_method::block_store_vectorize), - BLOCK_STORE_TRANSPOSE - = detail::to_BlockStoreAlgorithm_enum(::rocprim::block_store_method::block_store_transpose), - BLOCK_STORE_WARP_TRANSPOSE - = detail::to_BlockStoreAlgorithm_enum(::rocprim::block_store_method::block_store_warp_transpose), - BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED - = detail::to_BlockStoreAlgorithm_enum(::rocprim::block_store_method::block_store_warp_transpose) -}; - -template< - typename T, - int BLOCK_DIM_X, - int ITEMS_PER_THREAD, - BlockStoreAlgorithm ALGORITHM = BLOCK_STORE_DIRECT, - int BLOCK_DIM_Y = 1, - int BLOCK_DIM_Z = 1, - int ARCH = HIPCUB_ARCH /* ignored */ -> -class BlockStore - : private ::rocprim::block_store< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - ITEMS_PER_THREAD, - static_cast<::rocprim::block_store_method>(ALGORITHM) - > -{ - static_assert( - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z > 0, - "BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z must be greater than 0" - ); - - using base_type = - typename ::rocprim::block_store< - T, - BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, - ITEMS_PER_THREAD, - static_cast<::rocprim::block_store_method>(ALGORITHM) - >; - - // Reference to temporary storage (usually shared memory) - typename base_type::storage_type& temp_storage_; - -public: - using TempStorage = typename base_type::storage_type; - - HIPCUB_DEVICE inline - BlockStore() : temp_storage_(private_storage()) - { - } - - HIPCUB_DEVICE inline - BlockStore(TempStorage& temp_storage) : temp_storage_(temp_storage) - { - } - - template - HIPCUB_DEVICE inline - void Store(OutputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD]) - { - base_type::store(block_iter, items, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void Store(OutputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items) - { - base_type::store(block_iter, items, valid_items, temp_storage_); - } - -private: - HIPCUB_DEVICE inline - TempStorage& private_storage() - { - HIPCUB_SHARED_MEMORY TempStorage private_storage; - return private_storage; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_STORE_HPP_ diff --git a/hipcub/include/hipcub/rocprim/block/block_store_func.hpp b/hipcub/include/hipcub/rocprim/block/block_store_func.hpp deleted file mode 100644 index 48f9427f7..000000000 --- a/hipcub/include/hipcub/rocprim/block/block_store_func.hpp +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_STORE_FUNC_HPP_ -#define HIPCUB_ROCPRIM_BLOCK_BLOCK_STORE_FUNC_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorT -> -HIPCUB_DEVICE inline -void StoreDirectBlocked(int linear_id, - OutputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD]) -{ - ::rocprim::block_store_direct_blocked( - linear_id, block_iter, items - ); -} - -template< - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorT -> -HIPCUB_DEVICE inline -void StoreDirectBlocked(int linear_id, - OutputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items) -{ - ::rocprim::block_store_direct_blocked( - linear_id, block_iter, items, valid_items - ); -} - -template < - typename T, - int ITEMS_PER_THREAD -> -HIPCUB_DEVICE inline -void StoreDirectBlockedVectorized(int linear_id, - T* block_iter, - T (&items)[ITEMS_PER_THREAD]) -{ - ::rocprim::block_store_direct_blocked_vectorized( - linear_id, block_iter, items - ); -} - -template< - int BLOCK_THREADS, - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorT -> -HIPCUB_DEVICE inline -void StoreDirectStriped(int linear_id, - OutputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD]) -{ - ::rocprim::block_store_direct_striped( - linear_id, block_iter, items - ); -} - -template< - int BLOCK_THREADS, - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorT -> -HIPCUB_DEVICE inline -void StoreDirectStriped(int linear_id, - OutputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items) -{ - ::rocprim::block_store_direct_striped( - linear_id, block_iter, items, valid_items - ); -} - -template< - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorT -> -HIPCUB_DEVICE inline -void StoreDirectWarpStriped(int linear_id, - OutputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD]) -{ - ::rocprim::block_store_direct_warp_striped( - linear_id, block_iter, items - ); -} - -template< - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorT -> -HIPCUB_DEVICE inline -void StoreDirectWarpStriped(int linear_id, - OutputIteratorT block_iter, - T (&items)[ITEMS_PER_THREAD], - int valid_items) -{ - ::rocprim::block_store_direct_warp_striped( - linear_id, block_iter, items, valid_items - ); -} - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_STORE_FUNC_HPP_ diff --git a/hipcub/include/hipcub/rocprim/device/device_histogram.hpp b/hipcub/include/hipcub/rocprim/device/device_histogram.hpp deleted file mode 100644 index 8edef4252..000000000 --- a/hipcub/include/hipcub/rocprim/device/device_histogram.hpp +++ /dev/null @@ -1,283 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_HISTOGRAM_HPP_ -#define HIPCUB_ROCPRIM_DEVICE_DEVICE_HISTOGRAM_HPP_ - -#include "../../config.hpp" - -#include "../util_type.hpp" - -BEGIN_HIPCUB_NAMESPACE - -struct DeviceHistogram -{ - template< - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t HistogramEven(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram, - int num_levels, - LevelT lower_level, - LevelT upper_level, - OffsetT num_samples, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::histogram_even( - d_temp_storage, temp_storage_bytes, - d_samples, num_samples, - d_histogram, - num_levels, lower_level, upper_level, - stream, debug_synchronous - ); - } - - template< - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t HistogramEven(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram, - int num_levels, - LevelT lower_level, - LevelT upper_level, - OffsetT num_row_samples, - OffsetT num_rows, - size_t row_stride_bytes, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::histogram_even( - d_temp_storage, temp_storage_bytes, - d_samples, num_row_samples, num_rows, row_stride_bytes, - d_histogram, - num_levels, lower_level, upper_level, - stream, debug_synchronous - ); - } - - template< - int NUM_CHANNELS, - int NUM_ACTIVE_CHANNELS, - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t MultiHistogramEven(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram[NUM_ACTIVE_CHANNELS], - int num_levels[NUM_ACTIVE_CHANNELS], - LevelT lower_level[NUM_ACTIVE_CHANNELS], - LevelT upper_level[NUM_ACTIVE_CHANNELS], - OffsetT num_pixels, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - unsigned int levels[NUM_ACTIVE_CHANNELS]; - for(unsigned int channel = 0; channel < NUM_ACTIVE_CHANNELS; channel++) - { - levels[channel] = num_levels[channel]; - } - return ::rocprim::multi_histogram_even( - d_temp_storage, temp_storage_bytes, - d_samples, num_pixels, - d_histogram, - levels, lower_level, upper_level, - stream, debug_synchronous - ); - } - - template< - int NUM_CHANNELS, - int NUM_ACTIVE_CHANNELS, - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t MultiHistogramEven(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram[NUM_ACTIVE_CHANNELS], - int num_levels[NUM_ACTIVE_CHANNELS], - LevelT lower_level[NUM_ACTIVE_CHANNELS], - LevelT upper_level[NUM_ACTIVE_CHANNELS], - OffsetT num_row_pixels, - OffsetT num_rows, - size_t row_stride_bytes, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - unsigned int levels[NUM_ACTIVE_CHANNELS]; - for(unsigned int channel = 0; channel < NUM_ACTIVE_CHANNELS; channel++) - { - levels[channel] = num_levels[channel]; - } - return ::rocprim::multi_histogram_even( - d_temp_storage, temp_storage_bytes, - d_samples, num_row_pixels, num_rows, row_stride_bytes, - d_histogram, - levels, lower_level, upper_level, - stream, debug_synchronous - ); - } - - template< - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t HistogramRange(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram, - int num_levels, - LevelT * d_levels, - OffsetT num_samples, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::histogram_range( - d_temp_storage, temp_storage_bytes, - d_samples, num_samples, - d_histogram, - num_levels, d_levels, - stream, debug_synchronous - ); - } - - template< - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t HistogramRange(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram, - int num_levels, - LevelT * d_levels, - OffsetT num_row_samples, - OffsetT num_rows, - size_t row_stride_bytes, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::histogram_range( - d_temp_storage, temp_storage_bytes, - d_samples, num_row_samples, num_rows, row_stride_bytes, - d_histogram, - num_levels, d_levels, - stream, debug_synchronous - ); - } - - template< - int NUM_CHANNELS, - int NUM_ACTIVE_CHANNELS, - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t MultiHistogramRange(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram[NUM_ACTIVE_CHANNELS], - int num_levels[NUM_ACTIVE_CHANNELS], - LevelT * d_levels[NUM_ACTIVE_CHANNELS], - OffsetT num_pixels, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - unsigned int levels[NUM_ACTIVE_CHANNELS]; - for(unsigned int channel = 0; channel < NUM_ACTIVE_CHANNELS; channel++) - { - levels[channel] = num_levels[channel]; - } - return ::rocprim::multi_histogram_range( - d_temp_storage, temp_storage_bytes, - d_samples, num_pixels, - d_histogram, - levels, d_levels, - stream, debug_synchronous - ); - } - - template< - int NUM_CHANNELS, - int NUM_ACTIVE_CHANNELS, - typename SampleIteratorT, - typename CounterT, - typename LevelT, - typename OffsetT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t MultiHistogramRange(void * d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT * d_histogram[NUM_ACTIVE_CHANNELS], - int num_levels[NUM_ACTIVE_CHANNELS], - LevelT * d_levels[NUM_ACTIVE_CHANNELS], - OffsetT num_row_pixels, - OffsetT num_rows, - size_t row_stride_bytes, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - unsigned int levels[NUM_ACTIVE_CHANNELS]; - for(unsigned int channel = 0; channel < NUM_ACTIVE_CHANNELS; channel++) - { - levels[channel] = num_levels[channel]; - } - return ::rocprim::multi_histogram_range( - d_temp_storage, temp_storage_bytes, - d_samples, num_row_pixels, num_rows, row_stride_bytes, - d_histogram, - levels, d_levels, - stream, debug_synchronous - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_HISTOGRAM_HPP_ diff --git a/hipcub/include/hipcub/rocprim/device/device_radix_sort.hpp b/hipcub/include/hipcub/rocprim/device/device_radix_sort.hpp deleted file mode 100644 index 8c15256a5..000000000 --- a/hipcub/include/hipcub/rocprim/device/device_radix_sort.hpp +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_RADIX_SORT_HPP_ -#define HIPCUB_ROCPRIM_DEVICE_DEVICE_RADIX_SORT_HPP_ - -#include "../../config.hpp" - -#include "../util_type.hpp" - -BEGIN_HIPCUB_NAMESPACE - -struct DeviceRadixSort -{ - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairs(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - const ValueT * d_values_in, - ValueT * d_values_out, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::radix_sort_pairs( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, d_values_in, d_values_out, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairs(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - ::rocprim::double_buffer d_keys_db = detail::to_double_buffer(d_keys); - ::rocprim::double_buffer d_values_db = detail::to_double_buffer(d_values); - hipError_t error = ::rocprim::radix_sort_pairs( - d_temp_storage, temp_storage_bytes, - d_keys_db, d_values_db, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ); - detail::update_double_buffer(d_keys, d_keys_db); - detail::update_double_buffer(d_values, d_values_db); - return error; - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairsDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - const ValueT * d_values_in, - ValueT * d_values_out, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::radix_sort_pairs_desc( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, d_values_in, d_values_out, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairsDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - ::rocprim::double_buffer d_keys_db = detail::to_double_buffer(d_keys); - ::rocprim::double_buffer d_values_db = detail::to_double_buffer(d_values); - hipError_t error = ::rocprim::radix_sort_pairs_desc( - d_temp_storage, temp_storage_bytes, - d_keys_db, d_values_db, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ); - detail::update_double_buffer(d_keys, d_keys_db); - detail::update_double_buffer(d_values, d_values_db); - return error; - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeys(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::radix_sort_keys( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeys(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - ::rocprim::double_buffer d_keys_db = detail::to_double_buffer(d_keys); - hipError_t error = ::rocprim::radix_sort_keys( - d_temp_storage, temp_storage_bytes, - d_keys_db, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ); - detail::update_double_buffer(d_keys, d_keys_db); - return error; - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeysDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::radix_sort_keys_desc( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeysDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - ::rocprim::double_buffer d_keys_db = detail::to_double_buffer(d_keys); - hipError_t error = ::rocprim::radix_sort_keys_desc( - d_temp_storage, temp_storage_bytes, - d_keys_db, num_items, - begin_bit, end_bit, - stream, debug_synchronous - ); - detail::update_double_buffer(d_keys, d_keys_db); - return error; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_RADIX_SORT_HPP_ diff --git a/hipcub/include/hipcub/rocprim/device/device_reduce.hpp b/hipcub/include/hipcub/rocprim/device/device_reduce.hpp deleted file mode 100644 index 7be599cb9..000000000 --- a/hipcub/include/hipcub/rocprim/device/device_reduce.hpp +++ /dev/null @@ -1,268 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_ -#define HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_ - -#include -#include - -#include // __half - -#include "../../config.hpp" -#include "../iterator/arg_index_input_iterator.hpp" -#include "../thread/thread_operators.hpp" - -BEGIN_HIPCUB_NAMESPACE -namespace detail -{ - -template -inline -T get_lowest_value() -{ - return std::numeric_limits::lowest(); -} - -template<> -inline -__half get_lowest_value<__half>() -{ - unsigned short lowest_half = 0xfbff; - __half lowest_value = *reinterpret_cast<__half*>(&lowest_half); - return lowest_value; -} - -template -inline -T get_max_value() -{ - return std::numeric_limits::max(); -} - -template<> -inline -__half get_max_value<__half>() -{ - unsigned short max_half = 0x7bff; - __half max_value = *reinterpret_cast<__half*>(&max_half); - return max_value; -} - -} // end detail namespace - -class DeviceReduce -{ -public: - template < - typename InputIteratorT, - typename OutputIteratorT, - typename ReduceOpT, - typename T - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Reduce(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - ReduceOpT reduction_op, - T init, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::reduce( - d_temp_storage, temp_storage_bytes, - d_in, d_out, init, num_items, reduction_op, - stream, debug_synchronous - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Sum(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using T = typename std::iterator_traits::value_type; - return ::rocprim::reduce( - d_temp_storage, temp_storage_bytes, - d_in, d_out, T(0), num_items, ::hipcub::Sum(), - stream, debug_synchronous - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Min(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using T = typename std::iterator_traits::value_type; - return ::rocprim::reduce( - d_temp_storage, temp_storage_bytes, - d_in, d_out, detail::get_max_value(), num_items, ::hipcub::Min(), - stream, debug_synchronous - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ArgMin(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using OffsetT = int; - using T = typename std::iterator_traits::value_type; - using O = typename std::iterator_traits::value_type; - using OutputTupleT = - typename std::conditional< - std::is_same::value, - KeyValuePair, - O - >::type; - - using OutputValueT = typename OutputTupleT::Value; - using IteratorT = ArgIndexInputIterator; - - IteratorT d_indexed_in(d_in); - OutputTupleT init(1, detail::get_max_value()); - - return ::rocprim::reduce( - d_temp_storage, temp_storage_bytes, - d_indexed_in, d_out, init, num_items, ::hipcub::ArgMin(), - stream, debug_synchronous - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Max(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using T = typename std::iterator_traits::value_type; - return ::rocprim::reduce( - d_temp_storage, temp_storage_bytes, - d_in, d_out, detail::get_lowest_value(), num_items, ::hipcub::Max(), - stream, debug_synchronous - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ArgMax(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using OffsetT = int; - using T = typename std::iterator_traits::value_type; - using O = typename std::iterator_traits::value_type; - using OutputTupleT = - typename std::conditional< - std::is_same::value, - KeyValuePair, - O - >::type; - - using OutputValueT = typename OutputTupleT::Value; - using IteratorT = ArgIndexInputIterator; - - IteratorT d_indexed_in(d_in); - OutputTupleT init(1, detail::get_lowest_value()); - - return ::rocprim::reduce( - d_temp_storage, temp_storage_bytes, - d_indexed_in, d_out, init, num_items, ::hipcub::ArgMax(), - stream, debug_synchronous - ); - } - - template< - typename KeysInputIteratorT, - typename UniqueOutputIteratorT, - typename ValuesInputIteratorT, - typename AggregatesOutputIteratorT, - typename NumRunsOutputIteratorT, - typename ReductionOpT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ReduceByKey(void * d_temp_storage, - size_t& temp_storage_bytes, - KeysInputIteratorT d_keys_in, - UniqueOutputIteratorT d_unique_out, - ValuesInputIteratorT d_values_in, - AggregatesOutputIteratorT d_aggregates_out, - NumRunsOutputIteratorT d_num_runs_out, - ReductionOpT reduction_op, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using key_compare_op = - ::rocprim::equal_to::value_type>; - return ::rocprim::reduce_by_key( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_values_in, num_items, - d_unique_out, d_aggregates_out, d_num_runs_out, - reduction_op, key_compare_op(), - stream, debug_synchronous - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_ diff --git a/hipcub/include/hipcub/rocprim/device/device_run_length_encode.hpp b/hipcub/include/hipcub/rocprim/device/device_run_length_encode.hpp deleted file mode 100644 index 6fc7fb356..000000000 --- a/hipcub/include/hipcub/rocprim/device/device_run_length_encode.hpp +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_RUN_LENGTH_ENCODE_HPP_ -#define HIPCUB_ROCPRIM_DEVICE_DEVICE_RUN_LENGTH_ENCODE_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -class DeviceRunLengthEncode -{ -public: - template< - typename InputIteratorT, - typename UniqueOutputIteratorT, - typename LengthsOutputIteratorT, - typename NumRunsOutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Encode(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - UniqueOutputIteratorT d_unique_out, - LengthsOutputIteratorT d_counts_out, - NumRunsOutputIteratorT d_num_runs_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::run_length_encode( - d_temp_storage, temp_storage_bytes, - d_in, num_items, - d_unique_out, d_counts_out, d_num_runs_out, - stream, debug_synchronous - ); - } - - template< - typename InputIteratorT, - typename OffsetsOutputIteratorT, - typename LengthsOutputIteratorT, - typename NumRunsOutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t NonTrivialRuns(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OffsetsOutputIteratorT d_offsets_out, - LengthsOutputIteratorT d_lengths_out, - NumRunsOutputIteratorT d_num_runs_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::run_length_encode_non_trivial_runs( - d_temp_storage, temp_storage_bytes, - d_in, num_items, - d_offsets_out, d_lengths_out, d_num_runs_out, - stream, debug_synchronous - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_RUN_LENGTH_ENCODE_HPP_ diff --git a/hipcub/include/hipcub/rocprim/device/device_scan.hpp b/hipcub/include/hipcub/rocprim/device/device_scan.hpp deleted file mode 100644 index 3f275df74..000000000 --- a/hipcub/include/hipcub/rocprim/device/device_scan.hpp +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_SCAN_HPP_ -#define HIPCUB_ROCPRIM_DEVICE_DEVICE_SCAN_HPP_ - -#include "../../config.hpp" - -#include "../thread/thread_operators.hpp" - -BEGIN_HIPCUB_NAMESPACE - -class DeviceScan -{ -public: - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t InclusiveSum(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::inclusive_scan( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_items, ::hipcub::Sum(), - stream, debug_synchronous - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT, - typename ScanOpT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t InclusiveScan(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - ScanOpT scan_op, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::inclusive_scan( - d_temp_storage, temp_storage_bytes, - d_in, d_out, num_items, scan_op, - stream, debug_synchronous - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ExclusiveSum(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using T = typename std::iterator_traits::value_type; - return ::rocprim::exclusive_scan( - d_temp_storage, temp_storage_bytes, - d_in, d_out, T(0), num_items, ::hipcub::Sum(), - stream, debug_synchronous - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT, - typename ScanOpT, - typename InitValueT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ExclusiveScan(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - ScanOpT scan_op, - InitValueT init_value, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::exclusive_scan( - d_temp_storage, temp_storage_bytes, - d_in, d_out, init_value, num_items, scan_op, - stream, debug_synchronous - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_SCAN_HPP_ diff --git a/hipcub/include/hipcub/rocprim/device/device_segmented_radix_sort.hpp b/hipcub/include/hipcub/rocprim/device/device_segmented_radix_sort.hpp deleted file mode 100644 index 54c912563..000000000 --- a/hipcub/include/hipcub/rocprim/device/device_segmented_radix_sort.hpp +++ /dev/null @@ -1,245 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_RADIX_SORT_HPP_ -#define HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_RADIX_SORT_HPP_ - -#include "../../config.hpp" - -#include "../util_type.hpp" - -BEGIN_HIPCUB_NAMESPACE - -struct DeviceSegmentedRadixSort -{ - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairs(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - const ValueT * d_values_in, - ValueT * d_values_out, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::segmented_radix_sort_pairs( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, d_values_in, d_values_out, num_items, - num_segments, d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairs(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - ::rocprim::double_buffer d_keys_db = detail::to_double_buffer(d_keys); - ::rocprim::double_buffer d_values_db = detail::to_double_buffer(d_values); - hipError_t error = ::rocprim::segmented_radix_sort_pairs( - d_temp_storage, temp_storage_bytes, - d_keys_db, d_values_db, num_items, - num_segments, d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ); - detail::update_double_buffer(d_keys, d_keys_db); - detail::update_double_buffer(d_values, d_values_db); - return error; - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairsDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - const ValueT * d_values_in, - ValueT * d_values_out, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::segmented_radix_sort_pairs_desc( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, d_values_in, d_values_out, num_items, - num_segments, d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortPairsDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - ::rocprim::double_buffer d_keys_db = detail::to_double_buffer(d_keys); - ::rocprim::double_buffer d_values_db = detail::to_double_buffer(d_values); - hipError_t error = ::rocprim::segmented_radix_sort_pairs_desc( - d_temp_storage, temp_storage_bytes, - d_keys_db, d_values_db, num_items, - num_segments, d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ); - detail::update_double_buffer(d_keys, d_keys_db); - detail::update_double_buffer(d_values, d_values_db); - return error; - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeys(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::segmented_radix_sort_keys( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, num_items, - num_segments, d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeys(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - ::rocprim::double_buffer d_keys_db = detail::to_double_buffer(d_keys); - hipError_t error = ::rocprim::segmented_radix_sort_keys( - d_temp_storage, temp_storage_bytes, - d_keys_db, num_items, - num_segments, d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ); - detail::update_double_buffer(d_keys, d_keys_db); - return error; - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeysDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - const KeyT * d_keys_in, - KeyT * d_keys_out, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::segmented_radix_sort_keys_desc( - d_temp_storage, temp_storage_bytes, - d_keys_in, d_keys_out, num_items, - num_segments, d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ); - } - - template - HIPCUB_RUNTIME_FUNCTION static - hipError_t SortKeysDescending(void * d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - int begin_bit = 0, - int end_bit = sizeof(KeyT) * 8, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - ::rocprim::double_buffer d_keys_db = detail::to_double_buffer(d_keys); - hipError_t error = ::rocprim::segmented_radix_sort_keys_desc( - d_temp_storage, temp_storage_bytes, - d_keys_db, num_items, - num_segments, d_begin_offsets, d_end_offsets, - begin_bit, end_bit, - stream, debug_synchronous - ); - detail::update_double_buffer(d_keys, d_keys_db); - return error; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_RADIX_SORT_HPP_ diff --git a/hipcub/include/hipcub/rocprim/device/device_segmented_reduce.hpp b/hipcub/include/hipcub/rocprim/device/device_segmented_reduce.hpp deleted file mode 100644 index bf5b96517..000000000 --- a/hipcub/include/hipcub/rocprim/device/device_segmented_reduce.hpp +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_ -#define HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_ - -#include -#include - -#include "../../config.hpp" - -#include "../thread/thread_operators.hpp" - -BEGIN_HIPCUB_NAMESPACE - -struct DeviceSegmentedReduce -{ - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT, - typename ReductionOp, - typename T - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Reduce(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - ReductionOp reduction_op, - T initial_value, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::segmented_reduce( - d_temp_storage, temp_storage_bytes, - d_in, d_out, - num_segments, d_begin_offsets, d_end_offsets, - reduction_op, initial_value, - stream, debug_synchronous - ); - } - - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Sum(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using input_type = typename std::iterator_traits::value_type; - - return ::rocprim::segmented_reduce( - d_temp_storage, temp_storage_bytes, - d_in, d_out, - num_segments, d_begin_offsets, d_end_offsets, - ::hipcub::Sum(), input_type(), - stream, debug_synchronous - ); - } - - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Min(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using input_type = typename std::iterator_traits::value_type; - - return ::rocprim::segmented_reduce( - d_temp_storage, temp_storage_bytes, - d_in, d_out, - num_segments, d_begin_offsets, d_end_offsets, - ::hipcub::Min(), std::numeric_limits::max(), - stream, debug_synchronous - ); - } - - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ArgMin(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using OffsetT = int; - using T = typename std::iterator_traits::value_type; - using O = typename std::iterator_traits::value_type; - using OutputTupleT = typename std::conditional< - std::is_same::value, - KeyValuePair, - O - >::type; - - using OutputValueT = typename OutputTupleT::Value; - using IteratorT = ArgIndexInputIterator; - - IteratorT d_indexed_in(d_in); - const OutputTupleT init(1, std::numeric_limits::max()); - - return ::rocprim::segmented_reduce( - d_temp_storage, temp_storage_bytes, - d_indexed_in, d_out, - num_segments, d_begin_offsets, d_end_offsets, - ::hipcub::ArgMin(), init, - stream, debug_synchronous - ); - } - - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Max(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using input_type = typename std::iterator_traits::value_type; - - return ::rocprim::segmented_reduce( - d_temp_storage, temp_storage_bytes, - d_in, d_out, - num_segments, d_begin_offsets, d_end_offsets, - ::hipcub::Max(), std::numeric_limits::lowest(), - stream, debug_synchronous - ); - } - - template< - typename InputIteratorT, - typename OutputIteratorT, - typename OffsetIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t ArgMax(void * d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - OffsetIteratorT d_begin_offsets, - OffsetIteratorT d_end_offsets, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - using OffsetT = int; - using T = typename std::iterator_traits::value_type; - using O = typename std::iterator_traits::value_type; - using OutputTupleT = typename std::conditional< - std::is_same::value, - KeyValuePair, - O - >::type; - - using OutputValueT = typename OutputTupleT::Value; - using IteratorT = ArgIndexInputIterator; - - IteratorT d_indexed_in(d_in); - const OutputTupleT init(1, std::numeric_limits::lowest()); - - return ::rocprim::segmented_reduce( - d_temp_storage, temp_storage_bytes, - d_indexed_in, d_out, - num_segments, d_begin_offsets, d_end_offsets, - ::hipcub::ArgMax(), init, - stream, debug_synchronous - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_ diff --git a/hipcub/include/hipcub/rocprim/device/device_select.hpp b/hipcub/include/hipcub/rocprim/device/device_select.hpp deleted file mode 100644 index a9de9134e..000000000 --- a/hipcub/include/hipcub/rocprim/device/device_select.hpp +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_SELECT_HPP_ -#define HIPCUB_ROCPRIM_DEVICE_DEVICE_SELECT_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -class DeviceSelect -{ -public: - template < - typename InputIteratorT, - typename FlagIterator, - typename OutputIteratorT, - typename NumSelectedIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Flagged(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - FlagIterator d_flags, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::select( - d_temp_storage, temp_storage_bytes, - d_in, d_flags, d_out, d_num_selected_out, num_items, - stream, debug_synchronous - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT, - typename NumSelectedIteratorT, - typename SelectOp - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t If(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - int num_items, - SelectOp select_op, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::select( - d_temp_storage, temp_storage_bytes, - d_in, d_out, d_num_selected_out, num_items, select_op, - stream, debug_synchronous - ); - } - - template < - typename InputIteratorT, - typename OutputIteratorT, - typename NumSelectedIteratorT - > - HIPCUB_RUNTIME_FUNCTION static - hipError_t Unique(void *d_temp_storage, - size_t &temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - int num_items, - hipStream_t stream = 0, - bool debug_synchronous = false) - { - return ::rocprim::unique( - d_temp_storage, temp_storage_bytes, - d_in, d_out, d_num_selected_out, num_items, hipcub::Equality(), - stream, debug_synchronous - ); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_DEVICE_DEVICE_SELECT_HPP_ diff --git a/hipcub/include/hipcub/rocprim/hipcub.hpp b/hipcub/include/hipcub/rocprim/hipcub.hpp deleted file mode 100644 index c5c28cdcf..000000000 --- a/hipcub/include/hipcub/rocprim/hipcub.hpp +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_HIPCUB_HPP_ -#define HIPCUB_ROCPRIM_HIPCUB_HPP_ - -#include "../config.hpp" - -#include "util_type.hpp" -#include "util_ptx.hpp" -#include "thread/thread_operators.hpp" - -// Iterator -#include "iterator/arg_index_input_iterator.hpp" -#include "iterator/counting_input_iterator.hpp" -#include "iterator/tex_obj_input_iterator.hpp" -#include "iterator/transform_input_iterator.hpp" - -// Warp -#include "warp/warp_reduce.hpp" -#include "warp/warp_scan.hpp" - -// Block -#include "block/block_discontinuity.hpp" -#include "block/block_exchange.hpp" -#include "block/block_histogram.hpp" -#include "block/block_load.hpp" -#include "block/block_radix_sort.hpp" -#include "block/block_reduce.hpp" -#include "block/block_scan.hpp" -#include "block/block_store.hpp" - -// Device -#include "device/device_histogram.hpp" -#include "device/device_radix_sort.hpp" -#include "device/device_reduce.hpp" -#include "device/device_run_length_encode.hpp" -#include "device/device_scan.hpp" -#include "device/device_segmented_radix_sort.hpp" -#include "device/device_segmented_reduce.hpp" -#include "device/device_select.hpp" - -#endif // HIPCUB_ROCPRIM_HIPCUB_HPP_ diff --git a/hipcub/include/hipcub/rocprim/iterator/arg_index_input_iterator.hpp b/hipcub/include/hipcub/rocprim/iterator/arg_index_input_iterator.hpp deleted file mode 100644 index 2dcfabe6a..000000000 --- a/hipcub/include/hipcub/rocprim/iterator/arg_index_input_iterator.hpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_ITERATOR_ARG_INDEX_INPUT_ITERATOR_HPP_ -#define HIPCUB_ROCPRIM_ITERATOR_ARG_INDEX_INPUT_ITERATOR_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename InputIterator, - typename Difference = std::ptrdiff_t, - typename Value = typename std::iterator_traits::value_type -> -using ArgIndexInputIterator = ::rocprim::arg_index_iterator; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_ITERATOR_ARG_INDEX_INPUT_ITERATOR_HPP_ diff --git a/hipcub/include/hipcub/rocprim/iterator/constant_input_iterator.hpp b/hipcub/include/hipcub/rocprim/iterator/constant_input_iterator.hpp deleted file mode 100644 index f8e4fe0b4..000000000 --- a/hipcub/include/hipcub/rocprim/iterator/constant_input_iterator.hpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_ITERATOR_CONSTANT_INPUT_ITERATOR_HPP_ -#define HIPCUB_ROCPRIM_ITERATOR_CONSTANT_INPUT_ITERATOR_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename ValueType, - typename OffsetT = std::ptrdiff_t -> -using ConstantInputIterator = ::rocprim::constant_iterator; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_ITERATOR_CONSTANT_INPUT_ITERATOR_HPP_ diff --git a/hipcub/include/hipcub/rocprim/iterator/counting_input_iterator.hpp b/hipcub/include/hipcub/rocprim/iterator/counting_input_iterator.hpp deleted file mode 100644 index 57faae823..000000000 --- a/hipcub/include/hipcub/rocprim/iterator/counting_input_iterator.hpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_ITERATOR_COUNTING_INPUT_ITERATOR_HPP_ -#define HIPCUB_ROCPRIM_ITERATOR_COUNTING_INPUT_ITERATOR_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename ValueType, - typename OffsetT = std::ptrdiff_t -> -using CountingInputIterator = ::rocprim::counting_iterator; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_ITERATOR_COUNTING_INPUT_ITERATOR_HPP_ diff --git a/hipcub/include/hipcub/rocprim/iterator/tex_obj_input_iterator.hpp b/hipcub/include/hipcub/rocprim/iterator/tex_obj_input_iterator.hpp deleted file mode 100644 index bcb1b2dc2..000000000 --- a/hipcub/include/hipcub/rocprim/iterator/tex_obj_input_iterator.hpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_ITERATOR_TEX_OBJ_INPUT_ITERATOR_HPP_ -#define HIPCUB_ROCPRIM_ITERATOR_TEX_OBJ_INPUT_ITERATOR_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename T, - typename OffsetT = std::ptrdiff_t -> -using TexObjInputIterator = ::rocprim::texture_cache_iterator; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_ITERATOR_TEX_OBJ_INPUT_ITERATOR_HPP_ diff --git a/hipcub/include/hipcub/rocprim/iterator/transform_input_iterator.hpp b/hipcub/include/hipcub/rocprim/iterator/transform_input_iterator.hpp deleted file mode 100644 index 47b097ce2..000000000 --- a/hipcub/include/hipcub/rocprim/iterator/transform_input_iterator.hpp +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_ITERATOR_TRANSFORM_INPUT_ITERATOR_HPP_ -#define HIPCUB_ROCPRIM_ITERATOR_TRANSFORM_INPUT_ITERATOR_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename ValueType, - typename ConversionOp, - typename InputIteratorT, - typename OffsetT = std::ptrdiff_t // ignored -> -using TransformInputIterator = ::rocprim::transform_iterator; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_ITERATOR_TRANSFORM_INPUT_ITERATOR_HPP_ diff --git a/hipcub/include/hipcub/rocprim/thread/thread_operators.hpp b/hipcub/include/hipcub/rocprim/thread/thread_operators.hpp deleted file mode 100644 index 0e3e48900..000000000 --- a/hipcub/include/hipcub/rocprim/thread/thread_operators.hpp +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIBCUB_ROCPRIM_THREAD_THREAD_OPERATORS_HPP_ -#define HIBCUB_ROCPRIM_THREAD_THREAD_OPERATORS_HPP_ - -#include "../../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -struct Equality -{ - template - HIPCUB_HOST_DEVICE inline - constexpr bool operator()(const T& a, const T& b) const - { - return a == b; - } -}; - -struct Inequality -{ - template - HIPCUB_HOST_DEVICE inline - constexpr bool operator()(const T& a, const T& b) const - { - return a != b; - } -}; - -template -struct InequalityWrapper -{ - EqualityOp op; - - HIPCUB_HOST_DEVICE inline - InequalityWrapper(EqualityOp op) : op(op) {} - - template - HIPCUB_HOST_DEVICE inline - bool operator()(const T &a, const T &b) - { - return !op(a, b); - } -}; - -struct Sum -{ - template - HIPCUB_HOST_DEVICE inline - constexpr T operator()(const T &a, const T &b) const - { - return a + b; - } -}; - -struct Max -{ - template - HIPCUB_HOST_DEVICE inline - constexpr T operator()(const T &a, const T &b) const - { - return a < b ? b : a; - } -}; - -struct Min -{ - template - HIPCUB_HOST_DEVICE inline - constexpr T operator()(const T &a, const T &b) const - { - return a < b ? a : b; - } -}; - -struct ArgMax -{ - template< - class Key, - class Value - > - HIPCUB_HOST_DEVICE inline - constexpr KeyValuePair - operator()(const KeyValuePair& a, - const KeyValuePair& b) const - { - return ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a; - } -}; - -struct ArgMin -{ - template< - class Key, - class Value - > - HIPCUB_HOST_DEVICE inline - constexpr KeyValuePair - operator()(const KeyValuePair& a, - const KeyValuePair& b) const - { - return ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIBCUB_ROCPRIM_THREAD_THREAD_OPERATORS_HPP_ diff --git a/hipcub/include/hipcub/rocprim/util_ptx.hpp b/hipcub/include/hipcub/rocprim/util_ptx.hpp deleted file mode 100644 index 65be9ec3a..000000000 --- a/hipcub/include/hipcub/rocprim/util_ptx.hpp +++ /dev/null @@ -1,304 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_UTIL_PTX_HPP_ -#define HIPCUB_ROCPRIM_UTIL_PTX_HPP_ - -#include -#include - -#include "../config.hpp" - -#define HIPCUB_WARP_THREADS ::rocprim::warp_size() -#define HIPCUB_ARCH 1 // ignored with rocPRIM backend - -BEGIN_HIPCUB_NAMESPACE - -// Missing compared to CUB: -// * ThreadExit - not supported -// * ThreadTrap - not supported -// * FFMA_RZ, FMUL_RZ - not in CUB public API -// * WARP_SYNC - not supported, not CUB public API -// * CTA_SYNC_AND - not supported, not CUB public API -// * MatchAny - not in CUB public API -// -// Differences: -// * Warp thread masks (when used) are 64-bit unsigned integers -// * member_mask argument is ignored in WARP_[ALL|ANY|BALLOT] funcs -// * Arguments first_lane, last_lane, and member_mask are ignored -// in Shuffle* funcs -// * count in BAR is ignored, BAR works like CTA_SYNC - -// ID functions etc. - -HIPCUB_DEVICE inline -int RowMajorTid(int block_dim_x, int block_dim_y, int block_dim_z) -{ - return ((block_dim_z == 1) ? 0 : (hipThreadIdx_z * block_dim_x * block_dim_y)) - + ((block_dim_y == 1) ? 0 : (hipThreadIdx_y * block_dim_x)) - + hipThreadIdx_x; -} - -HIPCUB_DEVICE inline -unsigned int LaneId() -{ - return ::rocprim::lane_id(); -} - -HIPCUB_DEVICE inline -unsigned int WarpId() -{ - return ::rocprim::warp_id(); -} - -// Returns the warp lane mask of all lanes less than the calling thread -HIPCUB_DEVICE inline -uint64_t LaneMaskLt() -{ - return (uint64_t(1) << LaneId()) - 1; -} - -// Returns the warp lane mask of all lanes less than or equal to the calling thread -HIPCUB_DEVICE inline -uint64_t LaneMaskLe() -{ - return ((uint64_t(1) << LaneId()) << 1) - 1; -} - -// Returns the warp lane mask of all lanes greater than the calling thread -HIPCUB_DEVICE inline -uint64_t LaneMaskGt() -{ - return uint64_t(-1)^LaneMaskLe(); -} - -// Returns the warp lane mask of all lanes greater than or equal to the calling thread -HIPCUB_DEVICE inline -uint64_t LaneMaskGe() -{ - return uint64_t(-1)^LaneMaskLt(); -} - -// Shuffle funcs - -template < - int LOGICAL_WARP_THREADS, - typename T -> -HIPCUB_DEVICE inline -T ShuffleUp(T input, - int src_offset, - int first_thread, - unsigned int member_mask) -{ - // Not supproted in rocPRIM. - (void) first_thread; - // Member mask is not supported in rocPRIM, because it's - // not supported in ROCm. - (void) member_mask; - return ::rocprim::warp_shuffle_up( - input, src_offset, LOGICAL_WARP_THREADS - ); -} - -template < - int LOGICAL_WARP_THREADS, - typename T -> -HIPCUB_DEVICE inline -T ShuffleDown(T input, - int src_offset, - int last_thread, - unsigned int member_mask) -{ - // Not supproted in rocPRIM. - (void) last_thread; - // Member mask is not supported in rocPRIM, because it's - // not supported in ROCm. - (void) member_mask; - return ::rocprim::warp_shuffle_down( - input, src_offset, LOGICAL_WARP_THREADS - ); -} - -template < - int LOGICAL_WARP_THREADS, - typename T -> -HIPCUB_DEVICE inline -T ShuffleIndex(T input, - int src_lane, - unsigned int member_mask) -{ - // Member mask is not supported in rocPRIM, because it's - // not supported in ROCm. - (void) member_mask; - return ::rocprim::warp_shuffle( - input, src_lane, LOGICAL_WARP_THREADS - ); -} - -// Other - -HIPCUB_DEVICE inline -unsigned int SHR_ADD(unsigned int x, - unsigned int shift, - unsigned int addend) -{ - return (x >> shift) + addend; -} - -HIPCUB_DEVICE inline -unsigned int SHL_ADD(unsigned int x, - unsigned int shift, - unsigned int addend) -{ - return (x << shift) + addend; -} - -namespace detail { - -template -HIPCUB_DEVICE inline -auto unsigned_bit_extract(UnsignedBits source, - unsigned int bit_start, - unsigned int num_bits) - -> typename std::enable_if::type -{ - #ifdef __HIP_PLATFORM_HCC__ - #ifdef __HCC__ - using ::hc::__bitextract_u64; - #endif - return __bitextract_u64(source, bit_start, num_bits); - #else - return (source << (64 - bit_start - num_bits)) >> (64 - num_bits); - #endif // __HIP_PLATFORM_HCC__ -} - -template -HIPCUB_DEVICE inline -auto unsigned_bit_extract(UnsignedBits source, - unsigned int bit_start, - unsigned int num_bits) - -> typename std::enable_if::type -{ - #ifdef __HIP_PLATFORM_HCC__ - #ifdef __HCC__ - using ::hc::__bitextract_u32; - #endif - return __bitextract_u32(source, bit_start, num_bits); - #else - return (static_cast(source) << (32 - bit_start - num_bits)) >> (32 - num_bits); - #endif // __HIP_PLATFORM_HCC__ -} - -} // end namespace detail - -// Bitfield-extract. -// Extracts \p num_bits from \p source starting at bit-offset \p bit_start. -// The input \p source may be an 8b, 16b, 32b, or 64b unsigned integer type. -template -HIPCUB_DEVICE inline -unsigned int BFE(UnsignedBits source, - unsigned int bit_start, - unsigned int num_bits) -{ - static_assert(std::is_unsigned::value, "UnsignedBits must be unsigned"); - return detail::unsigned_bit_extract(source, bit_start, num_bits); -} - -// Bitfield insert. -// Inserts the \p num_bits least significant bits of \p y into \p x at bit-offset \p bit_start. -HIPCUB_DEVICE inline -void BFI(unsigned int &ret, - unsigned int x, - unsigned int y, - unsigned int bit_start, - unsigned int num_bits) -{ - #ifdef __HIP_PLATFORM_HCC__ - #ifdef __HCC__ - using ::hc::__bitinsert_u32; - #endif - ret = __bitinsert_u32(x, y, bit_start, num_bits); - #else - x <<= bit_start; - unsigned int MASK_X = ((1 << num_bits) - 1) << bit_start; - unsigned int MASK_Y = ~MASK_X; - ret = (y & MASK_Y) | (x & MASK_X); - #endif // __HIP_PLATFORM_HCC__ -} - -HIPCUB_DEVICE inline -unsigned int IADD3(unsigned int x, unsigned int y, unsigned int z) -{ - return x + y + z; -} - -HIPCUB_DEVICE inline -int PRMT(unsigned int a, unsigned int b, unsigned int index) -{ - return ::__byte_perm(a, b, index); -} - -HIPCUB_DEVICE inline -void BAR(int count) -{ - (void) count; - __syncthreads(); -} - -HIPCUB_DEVICE inline -void CTA_SYNC() -{ - __syncthreads(); -} - -HIPCUB_DEVICE inline -void WARP_SYNC(unsigned int member_mask) -{ - // Does nothing, on ROCm threads in warp are always in sync - (void) member_mask; -} - -HIPCUB_DEVICE inline -int WARP_ANY(int predicate, uint64_t member_mask) -{ - (void) member_mask; - return ::__any(predicate); -} - -HIPCUB_DEVICE inline -int WARP_ALL(int predicate, uint64_t member_mask) -{ - (void) member_mask; - return ::__all(predicate); -} - -HIPCUB_DEVICE inline -int64_t WARP_BALLOT(int predicate, uint64_t member_mask) -{ - (void) member_mask; - return __ballot(predicate); -} - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_UTIL_PTX_HPP_ diff --git a/hipcub/include/hipcub/rocprim/util_type.hpp b/hipcub/include/hipcub/rocprim/util_type.hpp deleted file mode 100644 index efbcdbb5f..000000000 --- a/hipcub/include/hipcub/rocprim/util_type.hpp +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_UTIL_TYPE_HPP_ -#define HIPCUB_ROCPRIM_UTIL_TYPE_HPP_ - -#include - -#include "../config.hpp" - -BEGIN_HIPCUB_NAMESPACE - -using NullType = ::rocprim::empty_type; - -template -struct If -{ - using Type = typename std::conditional::type; -}; - -template -struct IsPointer -{ - static constexpr bool VALUE = std::is_pointer::value; -}; - -template -struct IsVolatile -{ - static constexpr bool VALUE = std::is_volatile::value; -}; - -template -struct RemoveQualifiers -{ - using Type = typename std::remove_cv::type; -}; - -template -struct PowerOfTwo -{ - static constexpr bool VALUE = ::rocprim::detail::is_power_of_two(); -}; - -namespace detail -{ - -template -struct Log2Impl -{ - static constexpr int VALUE = Log2Impl> 1), COUNT + 1>::VALUE; -}; - -template -struct Log2Impl -{ - static constexpr int VALUE = (1 << (COUNT - 1) < N) ? COUNT : COUNT - 1; -}; - -} // end of detail namespace - -template -struct Log2 -{ - static_assert(N != 0, "The logarithm of zero is undefined"); - static constexpr int VALUE = detail::Log2Impl::VALUE; -}; - -template -struct DoubleBuffer -{ - T * d_buffers[2]; - - int selector; - - HIPCUB_HOST_DEVICE inline - DoubleBuffer() - { - selector = 0; - d_buffers[0] = nullptr; - d_buffers[1] = nullptr; - } - - HIPCUB_HOST_DEVICE inline - DoubleBuffer(T * d_current, T * d_alternate) - { - selector = 0; - d_buffers[0] = d_current; - d_buffers[1] = d_alternate; - } - - HIPCUB_HOST_DEVICE inline - T * Current() - { - return d_buffers[selector]; - } - - HIPCUB_HOST_DEVICE inline - T * Alternate() - { - return d_buffers[selector ^ 1]; - } -}; - -template< - class Key, - class Value -> -using KeyValuePair = ::rocprim::key_value_pair; - -namespace detail -{ - -template -inline -::rocprim::double_buffer to_double_buffer(DoubleBuffer& source) -{ - return ::rocprim::double_buffer(source.Current(), source.Alternate()); -} - -template -inline -void update_double_buffer(DoubleBuffer& target, ::rocprim::double_buffer& source) -{ - if(target.Current() != source.current()) - { - target.selector ^= 1; - } -} - -} - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_UTIL_TYPE_HPP_ diff --git a/hipcub/include/hipcub/rocprim/warp/warp_reduce.hpp b/hipcub/include/hipcub/rocprim/warp/warp_reduce.hpp deleted file mode 100644 index bc7144b02..000000000 --- a/hipcub/include/hipcub/rocprim/warp/warp_reduce.hpp +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_WARP_WARP_REDUCE_HPP_ -#define HIPCUB_ROCPRIM_WARP_WARP_REDUCE_HPP_ - -#include "../../config.hpp" - -#include "../util_ptx.hpp" -#include "../thread/thread_operators.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename T, - int LOGICAL_WARP_THREADS = HIPCUB_WARP_THREADS, - int ARCH = HIPCUB_ARCH> -class WarpReduce : private ::rocprim::warp_reduce -{ - static_assert(LOGICAL_WARP_THREADS > 0, "LOGICAL_WARP_THREADS must be greater than 0"); - using base_type = typename ::rocprim::warp_reduce; - - typename base_type::storage_type &temp_storage_; - -public: - using TempStorage = typename base_type::storage_type; - - HIPCUB_DEVICE inline - WarpReduce(TempStorage& temp_storage) : temp_storage_(temp_storage) - { - } - - HIPCUB_DEVICE inline - T Sum(T input) - { - base_type::reduce(input, input, temp_storage_); - return input; - } - - HIPCUB_DEVICE inline - T Sum(T input, int valid_items) - { - base_type::reduce(input, input, valid_items, temp_storage_); - return input; - } - - template - HIPCUB_DEVICE inline - T HeadSegmentedSum(T input, FlagT head_flag) - { - base_type::head_segmented_reduce(input, input, head_flag, temp_storage_); - return input; - } - - template - HIPCUB_DEVICE inline - T TailSegmentedSum(T input, FlagT tail_flag) - { - base_type::tail_segmented_reduce(input, input, tail_flag, temp_storage_); - return input; - } - - template - HIPCUB_DEVICE inline - T Reduce(T input, ReduceOp reduce_op) - { - base_type::reduce(input, input, temp_storage_, reduce_op); - return input; - } - - template - HIPCUB_DEVICE inline - T Reduce(T input, ReduceOp reduce_op, int valid_items) - { - base_type::reduce(input, input, valid_items, temp_storage_, reduce_op); - return input; - } - - template - HIPCUB_DEVICE inline - T HeadSegmentedReduce(T input, FlagT head_flag, ReduceOp reduce_op) - { - base_type::head_segmented_reduce( - input, input, head_flag, temp_storage_, reduce_op - ); - return input; - } - - template - HIPCUB_DEVICE inline - T TailSegmentedReduce(T input, FlagT tail_flag, ReduceOp reduce_op) - { - base_type::tail_segmented_reduce( - input, input, tail_flag, temp_storage_, reduce_op - ); - return input; - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_WARP_WARP_REDUCE_HPP_ diff --git a/hipcub/include/hipcub/rocprim/warp/warp_scan.hpp b/hipcub/include/hipcub/rocprim/warp/warp_scan.hpp deleted file mode 100644 index 5945f7e32..000000000 --- a/hipcub/include/hipcub/rocprim/warp/warp_scan.hpp +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef HIPCUB_ROCPRIM_WARP_WARP_SCAN_HPP_ -#define HIPCUB_ROCPRIM_WARP_WARP_SCAN_HPP_ - -#include "../../config.hpp" - -#include "../util_ptx.hpp" -#include "../thread/thread_operators.hpp" - -BEGIN_HIPCUB_NAMESPACE - -template< - typename T, - int LOGICAL_WARP_THREADS = HIPCUB_WARP_THREADS, - int ARCH = HIPCUB_ARCH> -class WarpScan : private ::rocprim::warp_scan -{ - static_assert(LOGICAL_WARP_THREADS > 0, "LOGICAL_WARP_THREADS must be greater than 0"); - using base_type = typename ::rocprim::warp_scan; - - typename base_type::storage_type &temp_storage_; - -public: - using TempStorage = typename base_type::storage_type; - - HIPCUB_DEVICE inline - WarpScan(TempStorage& temp_storage) : temp_storage_(temp_storage) - { - } - - HIPCUB_DEVICE inline - void InclusiveSum(T input, T& inclusive_output) - { - base_type::inclusive_scan(input, inclusive_output, temp_storage_); - } - - HIPCUB_DEVICE inline - void InclusiveSum(T input, T& inclusive_output, T& warp_aggregate) - { - base_type::inclusive_scan(input, inclusive_output, warp_aggregate, temp_storage_); - } - - HIPCUB_DEVICE inline - void ExclusiveSum(T input, T& exclusive_output) - { - base_type::exclusive_scan(input, exclusive_output, T(0), temp_storage_); - } - - HIPCUB_DEVICE inline - void ExclusiveSum(T input, T& exclusive_output, T& warp_aggregate) - { - base_type::exclusive_scan(input, exclusive_output, T(0), warp_aggregate, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void InclusiveScan(T input, T& inclusive_output, ScanOp scan_op) - { - base_type::inclusive_scan(input, inclusive_output, temp_storage_, scan_op); - } - - template - HIPCUB_DEVICE inline - void InclusiveScan(T input, T& inclusive_output, ScanOp scan_op, T& warp_aggregate) - { - base_type::inclusive_scan( - input, inclusive_output, warp_aggregate, - temp_storage_, scan_op - ); - } - - template - HIPCUB_DEVICE inline - void ExclusiveScan(T input, T& exclusive_output, ScanOp scan_op) - { - base_type::inclusive_scan(input, exclusive_output, temp_storage_, scan_op); - base_type::to_exclusive(exclusive_output, exclusive_output, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void ExclusiveScan(T input, T& exclusive_output, T initial_value, ScanOp scan_op) - { - base_type::exclusive_scan( - input, exclusive_output, initial_value, - temp_storage_, scan_op - ); - } - - template - HIPCUB_DEVICE inline - void ExclusiveScan(T input, T& exclusive_output, ScanOp scan_op, T& warp_aggregate) - { - base_type::inclusive_scan( - input, exclusive_output, warp_aggregate, temp_storage_, scan_op - ); - base_type::to_exclusive(exclusive_output, exclusive_output, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void ExclusiveScan(T input, T& exclusive_output, T initial_value, ScanOp scan_op, T& warp_aggregate) - { - base_type::exclusive_scan( - input, exclusive_output, initial_value, warp_aggregate, - temp_storage_, scan_op - ); - } - - template - HIPCUB_DEVICE inline - void Scan(T input, T& inclusive_output, T& exclusive_output, ScanOp scan_op) - { - base_type::inclusive_scan(input, inclusive_output, temp_storage_, scan_op); - base_type::to_exclusive(inclusive_output, exclusive_output, temp_storage_); - } - - template - HIPCUB_DEVICE inline - void Scan(T input, T& inclusive_output, T& exclusive_output, T initial_value, ScanOp scan_op) - { - base_type::scan( - input, inclusive_output, exclusive_output, initial_value, - temp_storage_, scan_op - ); - // In CUB documentation it's unclear if inclusive_output should include initial_value, - // however,the implementation includes initial_value in inclusive_output in WarpScan::Scan(). - // In rocPRIM it's not included, and this is a fix to match CUB implementation. - // After confirmation from CUB's developers we will most probably change rocPRIM too. - inclusive_output = scan_op(initial_value, inclusive_output); - } - - HIPCUB_DEVICE inline - T Broadcast(T input, unsigned int src_lane) - { - return base_type::broadcast(input, src_lane, temp_storage_); - } -}; - -END_HIPCUB_NAMESPACE - -#endif // HIPCUB_ROCPRIM_WARP_WARP_SCAN_HPP_ diff --git a/rocprim/include/rocprim/block/detail/block_reduce_raking_reduce.hpp b/rocprim/include/rocprim/block/detail/block_reduce_raking_reduce.hpp index d2acd5fbf..24b6c6b00 100644 --- a/rocprim/include/rocprim/block/detail/block_reduce_raking_reduce.hpp +++ b/rocprim/include/rocprim/block/detail/block_reduce_raking_reduce.hpp @@ -161,7 +161,6 @@ class block_reduce_raking_reduce if (flat_tid < warp_size_) { T thread_reduction = storage_.threads[flat_tid]; - #pragma unroll for(unsigned int i = warp_size_ + flat_tid; i < BlockSize; i += warp_size_) { thread_reduction = reduce_op( @@ -217,7 +216,6 @@ class block_reduce_raking_reduce if (flat_tid < warp_size_) { T thread_reduction = storage_.threads[flat_tid]; - #pragma unroll for(unsigned int i = warp_size_ + flat_tid; i < BlockSize; i += warp_size_) { if(i < valid_items) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4f56a3ec7..808f0a152 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -92,17 +92,8 @@ endfunction() # **************************************************************************** # HC and HIP tests without using rocPRIM -if(HIP_PLATFORM STREQUAL "hcc") - add_hip_test("hip.device_api" test_hip_api.cpp) - if(HIP_COMPILER STREQUAL "hcc") - add_hc_test ("hc.device_api" test_hc_api.cpp) - endif() -endif() +add_hip_test("hip.device_api" test_hip_api.cpp) +add_hc_test ("hc.device_api" test_hc_api.cpp) # rocPRIM test (run only on ROCm/hcc) -if(HIP_PLATFORM STREQUAL "hcc") - add_subdirectory(rocprim) -endif() - -# hipCUB tests -add_subdirectory(hipcub) +add_subdirectory(rocprim) diff --git a/test/extra/CMakeLists.txt b/test/extra/CMakeLists.txt index 0af44f594..172fb8b68 100644 --- a/test/extra/CMakeLists.txt +++ b/test/extra/CMakeLists.txt @@ -35,42 +35,8 @@ list(APPEND CMAKE_MODULE_PATH # Verify that hcc compiler is used on ROCM platform include(VerifyCompiler) -# Download CUB -include(DownloadProject) -if(HIP_PLATFORM STREQUAL "nvcc") - if(NOT DEFINED CUB_INCLUDE_DIR) - file( - DOWNLOAD https://github.com/NVlabs/cub/archive/1.8.0.zip - ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip - STATUS cub_download_status LOG cub_download_log - ) - list(GET cub_download_status 0 cub_download_error_code) - if(cub_download_error_code) - message(FATAL_ERROR "Error: downloading " - "https://github.com/NVlabs/cub/archive/1.8.0.zip failed " - "error_code: ${cub_download_error_code} " - "log: ${cub_download_log} " - ) - endif() - - execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - RESULT_VARIABLE cub_unpack_error_code - ) - if(cub_unpack_error_code) - message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip failed") - endif() - set(CUB_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0/ CACHE PATH "") - endif() -endif() - -if(HIP_PLATFORM STREQUAL "hcc") - # Find rocPRIM - find_package(rocprim REQUIRED CONFIG HINTS ${rocprim_DIR} PATHS "/opt/rocm/rocprim") -endif() -# Find hipCUB -find_package(hipcub REQUIRED CONFIG HINTS ${hipcub_DIR} PATHS "/opt/rocm/hipcub") +# Find rocPRIM +find_package(rocprim REQUIRED CONFIG HINTS ${rocprim_DIR} PATHS "/opt/rocm/rocprim") # Build CXX flags set(CMAKE_CXX_STANDARD 11) @@ -102,50 +68,5 @@ function(add_rocprim_test TEST_NAME TEST_SOURCES) add_test(${TEST_NAME} ${TEST_TARGET}) endfunction() -# Use CUDA_INCLUDE_DIRECTORIES to include required dirs -# for nvcc if cmake version is less than 3.10 -if((HIP_PLATFORM STREQUAL "nvcc") AND (CMAKE_VERSION VERSION_LESS "3.10")) - CUDA_INCLUDE_DIRECTORIES( - ${hipcub_INCLUDE_DIR} - ${GTEST_INCLUDE_DIRS} - ${CUB_INCLUDE_DIR} - ) -endif() - -function(add_hipcub_test TEST_NAME TEST_SOURCES) - list(GET TEST_SOURCES 0 TEST_MAIN_SOURCE) - get_filename_component(TEST_TARGET ${TEST_MAIN_SOURCE} NAME_WE) - if(HIP_PLATFORM STREQUAL "hcc") - add_executable(${TEST_TARGET} ${TEST_SOURCES}) - target_link_libraries(${TEST_TARGET} - PRIVATE - hip::hipcub - ) - foreach(amdgpu_target ${AMDGPU_TARGETS}) - target_link_libraries(${TEST_TARGET} - PRIVATE - --amdgpu-target=${amdgpu_target} - ) - endforeach() - else() # CUDA/nvcc - set_source_files_properties(${TEST_SOURCES} - PROPERTIES - CUDA_SOURCE_PROPERTY_FORMAT OBJ - ) - CUDA_ADD_EXECUTABLE(${TEST_TARGET} - ${TEST_SOURCES} - OPTIONS - --expt-extended-lambda - ) - target_link_libraries(${TEST_TARGET} hip::hipcub) - target_include_directories(${TEST_TARGET} SYSTEM PRIVATE ${CUB_INCLUDE_DIR}) - endif() - add_test(${TEST_NAME} ${TEST_TARGET}) -endfunction() -# rocPRIM package test -if(HIP_PLATFORM STREQUAL "hcc") - add_rocprim_test("test_rocprim_package" test_rocprim_package.cpp) -endif() -# hipCUB package test -add_hipcub_test("test_hipcub_package" test_hipcub_package.cpp) +add_rocprim_test("test_rocprim_package" test_rocprim_package.cpp) diff --git a/test/extra/test_hipcub_package.cpp b/test/extra/test_hipcub_package.cpp deleted file mode 100644 index e240fcd35..000000000 --- a/test/extra/test_hipcub_package.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include -#include -#include - -#include -#include - -#define HIP_CHECK(error) \ - { \ - if(error != hipSuccess){ \ - std::cout << error << std::endl; \ - exit(error); \ - } \ - } - -int main(int, char**) -{ - using T = unsigned int; - - // host input/output - const size_t size = 1024 * 256; - std::vector input(size, 1); - T output = 0; - - // device input/output - T * d_input; - T * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, sizeof(T))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - auto expected = std::accumulate(input.begin(), input.end(), 0U); - - // Temporary storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - hipcub::DeviceReduce::Sum( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size() - ) - ); - - // Allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - hipcub::DeviceReduce::Sum( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size() - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - &output, d_output, - sizeof(T), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - if(output != expected) - { - std::cout - << "Failure: output (" << output - << ") != expected (" << expected << ")" - << std::endl; - return 1; - } - return 0; -} \ No newline at end of file diff --git a/test/hipcub/CMakeLists.txt b/test/hipcub/CMakeLists.txt deleted file mode 100644 index c18d8b8bb..000000000 --- a/test/hipcub/CMakeLists.txt +++ /dev/null @@ -1,106 +0,0 @@ -# MIT License -# -# Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# Use CUDA_INCLUDE_DIRECTORIES to include required dirs -# for nvcc if cmake version is less than 3.10 -if((HIP_PLATFORM STREQUAL "nvcc") AND (CMAKE_VERSION VERSION_LESS "3.10")) - CUDA_INCLUDE_DIRECTORIES( - "${PROJECT_BINARY_DIR}/hipcub/include/hipcub" - "${PROJECT_BINARY_DIR}/hipcub/include" - "${PROJECT_SOURCE_DIR}/hipcub/include" - ${GTEST_INCLUDE_DIRS} - ${CUB_INCLUDE_DIR} - ) -endif() - -function(add_hipcub_test TEST_NAME TEST_SOURCES) - list(GET TEST_SOURCES 0 TEST_MAIN_SOURCE) - get_filename_component(TEST_TARGET ${TEST_MAIN_SOURCE} NAME_WE) - if(HIP_PLATFORM STREQUAL "hcc") - add_executable(${TEST_TARGET} ${TEST_SOURCES}) - target_include_directories(${TEST_TARGET} SYSTEM BEFORE - PUBLIC - ${GTEST_INCLUDE_DIRS} - ) - target_link_libraries(${TEST_TARGET} - PRIVATE - hipcub - ${GTEST_BOTH_LIBRARIES} - ) - foreach(amdgpu_target ${AMDGPU_TARGETS}) - target_link_libraries(${TEST_TARGET} - PRIVATE - --amdgpu-target=${amdgpu_target} - ) - endforeach() - else() - set_source_files_properties(${TEST_SOURCES} - PROPERTIES - CUDA_SOURCE_PROPERTY_FORMAT OBJ - ) - CUDA_ADD_EXECUTABLE(${TEST_TARGET} - ${TEST_SOURCES} - OPTIONS - --expt-extended-lambda - ) - target_include_directories(${TEST_TARGET} SYSTEM BEFORE - PUBLIC - ${GTEST_INCLUDE_DIRS} - ) - target_link_libraries(${TEST_TARGET} - hipcub_cub - ${GTEST_BOTH_LIBRARIES} - ) - endif() - set_target_properties(${TEST_TARGET} - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test/hipcub" - ) - add_test(${TEST_NAME} ${TEST_TARGET}) -endfunction() - -# **************************************************************************** -# Tests -# **************************************************************************** - -# HIP basic test, which also checks if there are no linkage problems when there are multiple sources -add_hipcub_test("hipcub.BasicTest" "test_hipcub_basic.cpp;detail/get_hipcub_version.cpp") - -add_hipcub_test("hipcub.BlockDiscontinuity" test_hipcub_block_discontinuity.cpp) -add_hipcub_test("hipcub.BlockExchange" test_hipcub_block_exchange.cpp) -add_hipcub_test("hipcub.BlockHistogram" test_hipcub_block_histogram.cpp) -add_hipcub_test("hipcub.BlockLoadStore" test_hipcub_block_load_store.cpp) -add_hipcub_test("hipcub.BlockRadixSort" test_hipcub_block_radix_sort.cpp) -add_hipcub_test("hipcub.BlockReduce" test_hipcub_block_reduce.cpp) -add_hipcub_test("hipcub.BlockScan" test_hipcub_block_scan.cpp) -add_hipcub_test("hipcub.DeviceHistogram" test_hipcub_device_histogram.cpp) -add_hipcub_test("hipcub.DeviceRadixSort" test_hipcub_device_radix_sort.cpp) -add_hipcub_test("hipcub.DeviceReduce" test_hipcub_device_reduce.cpp) -add_hipcub_test("hipcub.DeviceRunLengthEncode" test_hipcub_device_run_length_encode.cpp) -add_hipcub_test("hipcub.DeviceReduceByKey" test_hipcub_device_reduce_by_key.cpp) -add_hipcub_test("hipcub.DeviceScan" test_hipcub_device_scan.cpp) -add_hipcub_test("hipcub.DeviceSegmentedRadixSort" test_hipcub_device_segmented_radix_sort.cpp) -add_hipcub_test("hipcub.DeviceSegmentedReduce" test_hipcub_device_segmented_reduce.cpp) -add_hipcub_test("hipcub.DeviceSelect" test_hipcub_device_select.cpp) -add_hipcub_test("hipcub.UtilPtx" test_hipcub_util_ptx.cpp) -add_hipcub_test("hipcub.WarpReduce" test_hipcub_warp_reduce.cpp) -add_hipcub_test("hipcub.WarpScan" test_hipcub_warp_scan.cpp) diff --git a/test/hipcub/detail/get_hipcub_version.cpp b/test/hipcub/detail/get_hipcub_version.cpp deleted file mode 100644 index 60d247976..000000000 --- a/test/hipcub/detail/get_hipcub_version.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "get_hipcub_version.hpp" - -__global__ -void get_version_kernel(unsigned int * version) -{ - *version = HIPCUB_VERSION; -} - -unsigned int get_hipcub_version_on_device() -{ - unsigned int version = 0; - - unsigned int * d_version; - HIP_CHECK(hipMalloc(&d_version, sizeof(unsigned int))); - HIP_CHECK(hipDeviceSynchronize()); - - hipLaunchKernelGGL( - get_version_kernel, - dim3(1), dim3(1), 0, 0, - d_version - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - HIP_CHECK( - hipMemcpy( - &version, d_version, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipFree(d_version)); - - return version; -} diff --git a/test/hipcub/detail/get_hipcub_version.hpp b/test/hipcub/detail/get_hipcub_version.hpp deleted file mode 100644 index 3b9428a8e..000000000 --- a/test/hipcub/detail/get_hipcub_version.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#ifndef HIPCUB_TEST_DETAIL_GET_HIPCUB_VERSION_HPP_ -#define HIPCUB_TEST_DETAIL_GET_HIPCUB_VERSION_HPP_ - -#include -#include - -// hipCUB API -#include - -#define HIP_CHECK(condition) \ -{ \ - hipError_t error = condition; \ - if(error != hipSuccess){ \ - std::cout << "HIP error: " << error << " line: " << __LINE__ << std::endl; \ - exit(error); \ - } \ -} - -unsigned int get_hipcub_version_on_device(); - -#endif // HIPCUB_TEST_DETAIL_GET_HIPCUB_VERSION_HPP_ diff --git a/test/hipcub/test_hipcub_basic.cpp b/test/hipcub/test_hipcub_basic.cpp deleted file mode 100644 index 1fafa8a72..000000000 --- a/test/hipcub/test_hipcub_basic.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include - -// Google Test -#include - -// hipCUB API -#include - -#include "detail/get_hipcub_version.hpp" - -// get_hipcub_version_on_device is compiled in a separate source, -// that way we can be sure that all hipCUB functions are inline -// and there won't be any multiple definitions error -TEST(HipcubBasicTests, GetVersionOnDevice) -{ - int version = get_hipcub_version_on_device(); - ASSERT_EQ(version, HIPCUB_VERSION); -} diff --git a/test/hipcub/test_hipcub_block_discontinuity.cpp b/test/hipcub/test_hipcub_block_discontinuity.cpp deleted file mode 100644 index 43c3571be..000000000 --- a/test/hipcub/test_hipcub_block_discontinuity.cpp +++ /dev/null @@ -1,580 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include -#include -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -template< - class T, - class Flag, - unsigned int BlockSize, - unsigned int ItemsPerThread, - class FlagOp -> -struct params -{ - using type = T; - using flag_type = Flag; - static constexpr unsigned int block_size = BlockSize; - static constexpr unsigned int items_per_thread = ItemsPerThread; - using flag_op_type = FlagOp; -}; - -template -class HipcubBlockDiscontinuity : public ::testing::Test { -public: - using params = Params; -}; - -template -struct custom_flag_op1 -{ - HIPCUB_HOST_DEVICE - bool operator()(const T& a, const T& b) - { - return (a == b); - } -}; - -template -struct custom_flag_op2 -{ - HIPCUB_HOST_DEVICE - bool operator()(const T& a, const T& b) const - { - return (a - b > 5); - } -}; - -struct less -{ - template - HIPCUB_HOST_DEVICE inline - constexpr bool operator()(const T& a, const T& b) const - { - return a < b; - } -}; - -struct less_equal -{ - template - HIPCUB_HOST_DEVICE inline - constexpr bool operator()(const T& a, const T& b) const - { - return a <= b; - } -}; - -struct greater -{ - template - HIPCUB_HOST_DEVICE inline - constexpr bool operator()(const T& a, const T& b) const - { - return a > b; - } -}; - -struct greater_equal -{ - template - HIPCUB_HOST_DEVICE inline - constexpr bool operator()(const T& a, const T& b) const - { - return a >= b; - } -}; - -template -bool apply(FlagOp flag_op, const T& a, const T& b, unsigned int) -{ - return flag_op(a, b); -} - -typedef ::testing::Types< - // Power of 2 BlockSize - params, - params, - params, - params, - params >, - - // Non-power of 2 BlockSize - params, - params >, - params, - params, - params, - - // Power of 2 BlockSize and ItemsPerThread > 1 - params >, - params, - params >, - params, - - // Non-power of 2 BlockSize and ItemsPerThread > 1 - params >, - params, - params, - params > -> Params; - -TYPED_TEST_CASE(HipcubBlockDiscontinuity, Params); - -template< - class Type, - class FlagType, - class FlagOpType, - unsigned int BlockSize, - unsigned int ItemsPerThread -> -__global__ -void flag_heads_kernel(Type* device_input, long long* device_heads) -{ - const unsigned int lid = hipThreadIdx_x; - const unsigned int items_per_block = BlockSize * ItemsPerThread; - const unsigned int block_offset = hipBlockIdx_x * items_per_block; - - Type input[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_input + block_offset, input); - - hipcub::BlockDiscontinuity bdiscontinuity; - - FlagType head_flags[ItemsPerThread]; - if(hipBlockIdx_x % 2 == 1) - { - const Type tile_predecessor_item = device_input[block_offset - 1]; - bdiscontinuity.FlagHeads(head_flags, input, FlagOpType(), tile_predecessor_item); - } - else - { - bdiscontinuity.FlagHeads(head_flags, input, FlagOpType()); - } - - hipcub::StoreDirectBlocked(lid, device_heads + block_offset, head_flags); -} - -TYPED_TEST(HipcubBlockDiscontinuity, FlagHeads) -{ - using type = typename TestFixture::params::type; - // std::vector is a special case that will cause an error in hipMemcpy - // http://en.cppreference.com/w/cpp/container/vector_bool - using stored_flag_type = typename std::conditional< - std::is_same::value, - int, - typename TestFixture::params::flag_type - >::type; - using flag_type = typename TestFixture::params::flag_type; - using flag_op_type = typename TestFixture::params::flag_op_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 2048; - constexpr size_t grid_size = size / items_per_block; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, type(0), type(10)); - std::vector heads(size); - - // Calculate expected results on host - std::vector expected_heads(size); - flag_op_type flag_op; - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - for(size_t ii = 0; ii < items_per_block; ii++) - { - const size_t i = bi * items_per_block + ii; - if(ii == 0) - { - expected_heads[i] = bi % 2 == 1 - ? apply(flag_op, input[i - 1], input[i], ii) - : flag_type(true); - } - else - { - expected_heads[i] = apply(flag_op, input[i - 1], input[i], ii); - } - } - } - - // Preparing Device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - long long* device_heads; - HIP_CHECK(hipMalloc(&device_heads, heads.size() * sizeof(typename decltype(heads)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - flag_heads_kernel< - type, flag_type, flag_op_type, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_heads - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - heads.data(), device_heads, - heads.size() * sizeof(typename decltype(heads)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(heads[i], expected_heads[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_heads)); -} - -template< - class Type, - class FlagType, - class FlagOpType, - unsigned int BlockSize, - unsigned int ItemsPerThread -> -__global__ -void flag_tails_kernel(Type* device_input, long long* device_tails) -{ - const unsigned int lid = hipThreadIdx_x; - const unsigned int items_per_block = BlockSize * ItemsPerThread; - const unsigned int block_offset = hipBlockIdx_x * items_per_block; - - Type input[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_input + block_offset, input); - - hipcub::BlockDiscontinuity bdiscontinuity; - - FlagType tail_flags[ItemsPerThread]; - if(hipBlockIdx_x % 2 == 0) - { - const Type tile_successor_item = device_input[block_offset + items_per_block]; - bdiscontinuity.FlagTails(tail_flags, input, FlagOpType(), tile_successor_item); - } - else - { - bdiscontinuity.FlagTails(tail_flags, input, FlagOpType()); - } - - hipcub::StoreDirectBlocked(lid, device_tails + block_offset, tail_flags); -} - -TYPED_TEST(HipcubBlockDiscontinuity, FlagTails) -{ - using type = typename TestFixture::params::type; - // std::vector is a special case that will cause an error in hipMemcpy - // http://en.cppreference.com/w/cpp/container/vector_bool - using stored_flag_type = typename std::conditional< - std::is_same::value, - int, - typename TestFixture::params::flag_type - >::type; - using flag_type = typename TestFixture::params::flag_type; - using flag_op_type = typename TestFixture::params::flag_op_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 2048; - constexpr size_t grid_size = size / items_per_block; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, type(0), type(10)); - std::vector tails(size); - - // Calculate expected results on host - std::vector expected_tails(size); - flag_op_type flag_op; - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - for(size_t ii = 0; ii < items_per_block; ii++) - { - const size_t i = bi * items_per_block + ii; - if(ii == items_per_block - 1) - { - expected_tails[i] = bi % 2 == 0 - ? apply(flag_op, input[i], input[i + 1], ii + 1) - : flag_type(true); - } - else - { - expected_tails[i] = apply(flag_op, input[i], input[i + 1], ii + 1); - } - } - } - - // Preparing Device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - long long* device_tails; - HIP_CHECK(hipMalloc(&device_tails, tails.size() * sizeof(typename decltype(tails)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - flag_tails_kernel< - type, flag_type, flag_op_type, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_tails - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - tails.data(), device_tails, - tails.size() * sizeof(typename decltype(tails)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(tails[i], expected_tails[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_tails)); -} - -template< - class Type, - class FlagType, - class FlagOpType, - unsigned int BlockSize, - unsigned int ItemsPerThread -> -__global__ -void flag_heads_and_tails_kernel(Type* device_input, long long* device_heads, long long* device_tails) -{ - const unsigned int lid = hipThreadIdx_x; - const unsigned int items_per_block = BlockSize * ItemsPerThread; - const unsigned int block_offset = hipBlockIdx_x * items_per_block; - - Type input[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_input + block_offset, input); - - hipcub::BlockDiscontinuity bdiscontinuity; - - FlagType head_flags[ItemsPerThread]; - FlagType tail_flags[ItemsPerThread]; - if(hipBlockIdx_x % 4 == 0) - { - const Type tile_successor_item = device_input[block_offset + items_per_block]; - bdiscontinuity.FlagHeadsAndTails(head_flags, tail_flags, tile_successor_item, input, FlagOpType()); - } - else if(hipBlockIdx_x % 4 == 1) - { - const Type tile_predecessor_item = device_input[block_offset - 1]; - const Type tile_successor_item = device_input[block_offset + items_per_block]; - bdiscontinuity.FlagHeadsAndTails(head_flags, tile_predecessor_item, tail_flags, tile_successor_item, input, FlagOpType()); - } - else if(hipBlockIdx_x % 4 == 2) - { - const Type tile_predecessor_item = device_input[block_offset - 1]; - bdiscontinuity.FlagHeadsAndTails(head_flags, tile_predecessor_item, tail_flags, input, FlagOpType()); - } - else if(hipBlockIdx_x % 4 == 3) - { - bdiscontinuity.FlagHeadsAndTails(head_flags, tail_flags, input, FlagOpType()); - } - - hipcub::StoreDirectBlocked(lid, device_heads + block_offset, head_flags); - hipcub::StoreDirectBlocked(lid, device_tails + block_offset, tail_flags); -} - -TYPED_TEST(HipcubBlockDiscontinuity, FlagHeadsAndTails) -{ - using type = typename TestFixture::params::type; - // std::vector is a special case that will cause an error in hipMemcpy - // http://en.cppreference.com/w/cpp/container/vector_bool - using stored_flag_type = typename std::conditional< - std::is_same::value, - int, - typename TestFixture::params::flag_type - >::type; - using flag_type = typename TestFixture::params::flag_type; - using flag_op_type = typename TestFixture::params::flag_op_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 2048; - constexpr size_t grid_size = size / items_per_block; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, type(0), type(10)); - std::vector heads(size); - std::vector tails(size); - - // Calculate expected results on host - std::vector expected_heads(size); - std::vector expected_tails(size); - flag_op_type flag_op; - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - for(size_t ii = 0; ii < items_per_block; ii++) - { - const size_t i = bi * items_per_block + ii; - if(ii == 0) - { - expected_heads[i] = (bi % 4 == 1 || bi % 4 == 2) - ? apply(flag_op, input[i - 1], input[i], ii) - : flag_type(true); - } - else - { - expected_heads[i] = apply(flag_op, input[i - 1], input[i], ii); - } - if(ii == items_per_block - 1) - { - expected_tails[i] = (bi % 4 == 0 || bi % 4 == 1) - ? apply(flag_op, input[i], input[i + 1], ii + 1) - : flag_type(true); - } - else - { - expected_tails[i] = apply(flag_op, input[i], input[i + 1], ii + 1); - } - } - } - - // Preparing Device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - long long* device_heads; - HIP_CHECK(hipMalloc(&device_heads, tails.size() * sizeof(typename decltype(heads)::value_type))); - long long* device_tails; - HIP_CHECK(hipMalloc(&device_tails, tails.size() * sizeof(typename decltype(tails)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - flag_heads_and_tails_kernel< - type, flag_type, flag_op_type, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_heads, device_tails - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - heads.data(), device_heads, - heads.size() * sizeof(typename decltype(heads)::value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - tails.data(), device_tails, - tails.size() * sizeof(typename decltype(tails)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(heads[i], expected_heads[i]); - ASSERT_EQ(tails[i], expected_tails[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_heads)); - HIP_CHECK(hipFree(device_tails)); -} - diff --git a/test/hipcub/test_hipcub_block_exchange.cpp b/test/hipcub/test_hipcub_block_exchange.cpp deleted file mode 100644 index 2a2ff63c9..000000000 --- a/test/hipcub/test_hipcub_block_exchange.cpp +++ /dev/null @@ -1,797 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -template< - class T, - class U, - unsigned int BlockSize, - unsigned int ItemsPerThread -> -struct params -{ - using type = T; - using output_type = U; - static constexpr unsigned int block_size = BlockSize; - static constexpr unsigned int items_per_thread = ItemsPerThread; -}; - -template -class HipcubBlockExchangeTests : public ::testing::Test { -public: - using params = Params; -}; - -template -struct dummy -{ - T x; - T y; - -#ifdef HIPCUB_ROCPRIM_API - HIPCUB_HOST_DEVICE -#endif - dummy() = default; - - template - HIPCUB_HOST_DEVICE - dummy(U a) : x(a + 1), y(a * 2) { } - - HIPCUB_HOST_DEVICE - bool operator==(const dummy& rhs) const - { - return x == rhs.x && y == rhs.y; - } -}; - -typedef ::testing::Types< - // Power of 2 BlockSize and ItemsPerThread = 1 (no rearrangement) - params, - params, - params, - params, 256, 1>, - params, - - // Power of 2 BlockSize and ItemsPerThread > 1 - params, - params, - params, - params, 128, 7>, - params, - params, - - // Non-power of 2 BlockSize and ItemsPerThread > 1 - params, - params, 464U, 2>, - params, - params -> Params; - -TYPED_TEST_CASE(HipcubBlockExchangeTests, Params); - -template< - class Type, - class OutputType, - unsigned int ItemsPerBlock, - unsigned int ItemsPerThread -> -__global__ -void blocked_to_striped_kernel(Type* device_input, OutputType* device_output) -{ - constexpr unsigned int block_size = (ItemsPerBlock / ItemsPerThread); - const unsigned int lid = hipThreadIdx_x; - const unsigned int block_offset = hipBlockIdx_x * ItemsPerBlock; - - Type input[ItemsPerThread]; - OutputType output[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_input + block_offset, input); - - hipcub::BlockExchange exchange; - exchange.BlockedToStriped(input, output); - - hipcub::StoreDirectBlocked(lid, device_output + block_offset, output); -} - -TYPED_TEST(HipcubBlockExchangeTests, BlockedToStriped) -{ - using type = typename TestFixture::params::type; - using output_type = typename TestFixture::params::output_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr size_t items_per_block = block_size * items_per_thread; - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = items_per_block * 113; - // Generate data - std::vector input(size); - std::vector expected(size); - std::vector output(size, output_type(0)); - - // Calculate input and expected results on host - std::vector values(size); - std::iota(values.begin(), values.end(), 0); - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - for(size_t ti = 0; ti < block_size; ti++) - { - for(size_t ii = 0; ii < items_per_thread; ii++) - { - const size_t offset = bi * items_per_block; - const size_t i0 = offset + ti * items_per_thread + ii; - const size_t i1 = offset + ii * block_size + ti; - input[i1] = values[i1]; - expected[i0] = values[i1]; - } - } - } - - // Preparing device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - output_type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - constexpr unsigned int grid_size = (size / items_per_block); - hipLaunchKernelGGL( - HIP_KERNEL_NAME(blocked_to_striped_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class Type, - class OutputType, - unsigned int ItemsPerBlock, - unsigned int ItemsPerThread -> -__global__ -void striped_to_blocked_kernel(Type* device_input, OutputType* device_output) -{ - constexpr unsigned int block_size = (ItemsPerBlock / ItemsPerThread); - const unsigned int lid = hipThreadIdx_x; - const unsigned int block_offset = hipBlockIdx_x * ItemsPerBlock; - - Type input[ItemsPerThread]; - OutputType output[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_input + block_offset, input); - - hipcub::BlockExchange exchange; - exchange.StripedToBlocked(input, output); - - hipcub::StoreDirectBlocked(lid, device_output + block_offset, output); -} - -TYPED_TEST(HipcubBlockExchangeTests, StripedToBlocked) -{ - using type = typename TestFixture::params::type; - using output_type = typename TestFixture::params::output_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr size_t items_per_block = block_size * items_per_thread; - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = items_per_block * 113; - // Generate data - std::vector input(size); - std::vector expected(size); - std::vector output(size, output_type(0)); - - // Calculate input and expected results on host - std::vector values(size); - std::iota(values.begin(), values.end(), 0); - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - for(size_t ti = 0; ti < block_size; ti++) - { - for(size_t ii = 0; ii < items_per_thread; ii++) - { - const size_t offset = bi * items_per_block; - const size_t i0 = offset + ti * items_per_thread + ii; - const size_t i1 = offset + ii * block_size + ti; - input[i0] = values[i1]; - expected[i1] = values[i1]; - } - } - } - - // Preparing device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - output_type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - constexpr unsigned int grid_size = (size / items_per_block); - hipLaunchKernelGGL( - HIP_KERNEL_NAME(striped_to_blocked_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class Type, - class OutputType, - unsigned int ItemsPerBlock, - unsigned int ItemsPerThread -> -__global__ -void blocked_to_warp_striped_kernel(Type* device_input, OutputType* device_output) -{ - constexpr unsigned int block_size = (ItemsPerBlock / ItemsPerThread); - const unsigned int lid = hipThreadIdx_x; - const unsigned int block_offset = hipBlockIdx_x * ItemsPerBlock; - - Type input[ItemsPerThread]; - OutputType output[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_input + block_offset, input); - - hipcub::BlockExchange exchange; - exchange.BlockedToWarpStriped(input, output); - - hipcub::StoreDirectBlocked(lid, device_output + block_offset, output); -} - -TYPED_TEST(HipcubBlockExchangeTests, BlockedToWarpStriped) -{ - using type = typename TestFixture::params::type; - using output_type = typename TestFixture::params::output_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr size_t items_per_block = block_size * items_per_thread; - // Given block size not supported - bool is_block_size_unsupported = block_size > test_utils::get_max_block_size(); -#ifdef HIPCUB_CUB_API - // CUB does not support exchanges to/from warp-striped arrangements - // for incomplete blocks (not divisible by warp size) - // Workaround for nvcc warning: "dynamic initialization in unreachable code" - // (not a simple if with compile-time expression) - is_block_size_unsupported |= block_size % HIPCUB_WARP_THREADS != 0; -#endif - if(is_block_size_unsupported) - { - return; - } - - const size_t size = items_per_block * 113; - // Generate data - std::vector input(size); - std::vector expected(size); - std::vector output(size, output_type(0)); - - constexpr size_t warp_size = test_utils::get_min_warp_size(block_size, size_t(HIPCUB_WARP_THREADS)); - constexpr size_t warps_no = (block_size + warp_size - 1) / warp_size; - constexpr size_t items_per_warp = warp_size * items_per_thread; - - // Calculate input and expected results on host - std::vector values(size); - std::iota(values.begin(), values.end(), 0); - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - for(size_t wi = 0; wi < warps_no; wi++) - { - const size_t current_warp_size = wi == warps_no - 1 - ? (block_size % warp_size != 0 ? block_size % warp_size : warp_size) - : warp_size; - for(size_t li = 0; li < current_warp_size; li++) - { - for(size_t ii = 0; ii < items_per_thread; ii++) - { - const size_t offset = bi * items_per_block + wi * items_per_warp; - const size_t i0 = offset + li * items_per_thread + ii; - const size_t i1 = offset + ii * current_warp_size + li; - input[i1] = values[i1]; - expected[i0] = values[i1]; - } - } - } - } - - // Preparing device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - output_type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - constexpr unsigned int grid_size = (size / items_per_block); - hipLaunchKernelGGL( - HIP_KERNEL_NAME(blocked_to_warp_striped_kernel< - type, output_type, items_per_block, items_per_thread - >), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class Type, - class OutputType, - unsigned int ItemsPerBlock, - unsigned int ItemsPerThread -> -__global__ -void warp_striped_to_blocked_kernel(Type* device_input, OutputType* device_output) -{ - constexpr unsigned int block_size = (ItemsPerBlock / ItemsPerThread); - const unsigned int lid = hipThreadIdx_x; - const unsigned int block_offset = hipBlockIdx_x * ItemsPerBlock; - - Type input[ItemsPerThread]; - OutputType output[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_input + block_offset, input); - - hipcub::BlockExchange exchange; - exchange.WarpStripedToBlocked(input, output); - - hipcub::StoreDirectBlocked(lid, device_output + block_offset, output); -} - -TYPED_TEST(HipcubBlockExchangeTests, WarpStripedToBlocked) -{ - using type = typename TestFixture::params::type; - using output_type = typename TestFixture::params::output_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr size_t items_per_block = block_size * items_per_thread; - // Given block size not supported - bool is_block_size_unsupported = block_size > test_utils::get_max_block_size(); -#ifdef HIPCUB_CUB_API - // CUB does not support exchanges to/from warp-striped arrangements - // for incomplete blocks (not divisible by warp size) - // Workaround for nvcc warning: "dynamic initialization in unreachable code" - // (not a simple if with compile-time expression) - is_block_size_unsupported |= block_size % HIPCUB_WARP_THREADS != 0; -#endif - if(is_block_size_unsupported) - { - return; - } - - const size_t size = items_per_block * 113; - // Generate data - std::vector input(size); - std::vector expected(size); - std::vector output(size, output_type(0)); - - constexpr size_t warp_size = test_utils::get_min_warp_size(block_size, size_t(HIPCUB_WARP_THREADS)); - constexpr size_t warps_no = (block_size + warp_size - 1) / warp_size; - constexpr size_t items_per_warp = warp_size * items_per_thread; - - // Calculate input and expected results on host - std::vector values(size); - std::iota(values.begin(), values.end(), 0); - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - for(size_t wi = 0; wi < warps_no; wi++) - { - const size_t current_warp_size = wi == warps_no - 1 - ? (block_size % warp_size != 0 ? block_size % warp_size : warp_size) - : warp_size; - for(size_t li = 0; li < current_warp_size; li++) - { - for(size_t ii = 0; ii < items_per_thread; ii++) - { - const size_t offset = bi * items_per_block + wi * items_per_warp; - const size_t i0 = offset + li * items_per_thread + ii; - const size_t i1 = offset + ii * current_warp_size + li; - input[i0] = values[i1]; - expected[i1] = values[i1]; - } - } - } - } - - // Preparing device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - output_type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - constexpr unsigned int grid_size = (size / items_per_block); - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_striped_to_blocked_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class Type, - class OutputType, - unsigned int ItemsPerBlock, - unsigned int ItemsPerThread -> -__global__ -void scatter_to_blocked_kernel(Type* device_input, OutputType* device_output, unsigned int* device_ranks) -{ - constexpr unsigned int block_size = (ItemsPerBlock / ItemsPerThread); - const unsigned int lid = hipThreadIdx_x; - const unsigned int block_offset = hipBlockIdx_x * ItemsPerBlock; - - Type input[ItemsPerThread]; - OutputType output[ItemsPerThread]; - unsigned int ranks[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_input + block_offset, input); - hipcub::LoadDirectBlocked(lid, device_ranks + block_offset, ranks); - - hipcub::BlockExchange exchange; - exchange.ScatterToBlocked(input, output, ranks); - - hipcub::StoreDirectBlocked(lid, device_output + block_offset, output); -} - -TYPED_TEST(HipcubBlockExchangeTests, ScatterToBlocked) -{ - using type = typename TestFixture::params::type; - using output_type = typename TestFixture::params::output_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr size_t items_per_block = block_size * items_per_thread; - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = items_per_block * 113; - // Generate data - std::vector input(size); - std::vector expected(size); - std::vector output(size, output_type(0)); - std::vector ranks(size); - - // Calculate input and expected results on host - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - auto block_ranks = ranks.begin() + bi * items_per_block; - std::iota(block_ranks, block_ranks + items_per_block, 0); - std::shuffle(block_ranks, block_ranks + items_per_block, std::mt19937{std::random_device{}()}); - } - std::vector values(size); - std::iota(values.begin(), values.end(), 0); - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - for(size_t ti = 0; ti < block_size; ti++) - { - for(size_t ii = 0; ii < items_per_thread; ii++) - { - const size_t offset = bi * items_per_block; - const size_t i0 = offset + ti * items_per_thread + ii; - const size_t i1 = offset + ranks[i0]; - input[i0] = values[i0]; - expected[i1] = values[i0]; - } - } - } - - // Preparing device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - output_type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - unsigned int* device_ranks; - HIP_CHECK(hipMalloc(&device_ranks, ranks.size() * sizeof(typename decltype(ranks)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_ranks, ranks.data(), - ranks.size() * sizeof(unsigned int), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - constexpr unsigned int grid_size = (size / items_per_block); - hipLaunchKernelGGL( - HIP_KERNEL_NAME(scatter_to_blocked_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, device_ranks - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_ranks)); -} - -template< - class Type, - class OutputType, - unsigned int ItemsPerBlock, - unsigned int ItemsPerThread -> -__global__ -void scatter_to_striped_kernel(Type* device_input, OutputType* device_output, unsigned int* device_ranks) -{ - constexpr unsigned int block_size = (ItemsPerBlock / ItemsPerThread); - const unsigned int lid = hipThreadIdx_x; - const unsigned int block_offset = hipBlockIdx_x * ItemsPerBlock; - - Type input[ItemsPerThread]; - OutputType output[ItemsPerThread]; - unsigned int ranks[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_input + block_offset, input); - hipcub::LoadDirectBlocked(lid, device_ranks + block_offset, ranks); - - hipcub::BlockExchange exchange; - exchange.ScatterToStriped(input, output, ranks); - - hipcub::StoreDirectBlocked(lid, device_output + block_offset, output); -} - -TYPED_TEST(HipcubBlockExchangeTests, ScatterToStriped) -{ - using type = typename TestFixture::params::type; - using output_type = typename TestFixture::params::output_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr size_t items_per_block = block_size * items_per_thread; - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = items_per_block * 113; - // Generate data - std::vector input(size); - std::vector expected(size); - std::vector output(size, output_type(0)); - std::vector ranks(size); - - // Calculate input and expected results on host - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - auto block_ranks = ranks.begin() + bi * items_per_block; - std::iota(block_ranks, block_ranks + items_per_block, 0); - std::shuffle(block_ranks, block_ranks + items_per_block, std::mt19937{std::random_device{}()}); - } - std::vector values(size); - std::iota(values.begin(), values.end(), 0); - for(size_t bi = 0; bi < size / items_per_block; bi++) - { - for(size_t ti = 0; ti < block_size; ti++) - { - for(size_t ii = 0; ii < items_per_thread; ii++) - { - const size_t offset = bi * items_per_block; - const size_t i0 = offset + ti * items_per_thread + ii; - const size_t i1 = offset - + ranks[i0] % block_size * items_per_thread - + ranks[i0] / block_size; - input[i0] = values[i0]; - expected[i1] = values[i0]; - } - } - } - - // Preparing device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - output_type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - unsigned int* device_ranks; - HIP_CHECK(hipMalloc(&device_ranks, ranks.size() * sizeof(typename decltype(ranks)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_ranks, ranks.data(), - ranks.size() * sizeof(unsigned int), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - constexpr unsigned int grid_size = (size / items_per_block); - hipLaunchKernelGGL( - HIP_KERNEL_NAME(scatter_to_striped_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, device_ranks - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_ranks)); - -} - diff --git a/test/hipcub/test_hipcub_block_histogram.cpp b/test/hipcub/test_hipcub_block_histogram.cpp deleted file mode 100644 index 00a5f87ee..000000000 --- a/test/hipcub/test_hipcub_block_histogram.cpp +++ /dev/null @@ -1,221 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include - -// Google Test -#include -// rocPRIM API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(static_cast(error), hipSuccess) - -// Params for tests -template< - class T, - unsigned int BlockSize = 256U, - unsigned int ItemsPerThread = 1U, - unsigned int BinSize = BlockSize, - hipcub::BlockHistogramAlgorithm Algorithm = hipcub::BlockHistogramAlgorithm::BLOCK_HISTO_ATOMIC -> -struct params -{ - using type = T; - static constexpr hipcub::BlockHistogramAlgorithm algorithm = Algorithm; - static constexpr unsigned int block_size = BlockSize; - static constexpr unsigned int items_per_thread = ItemsPerThread; - static constexpr unsigned int bin_size = BinSize; -}; - -template -class HipcubBlockHistogramInputArrayTests : public ::testing::Test -{ -public: - using type = typename Params::type; - static constexpr unsigned int block_size = Params::block_size; - static constexpr hipcub::BlockHistogramAlgorithm algorithm = Params::algorithm; - static constexpr unsigned int items_per_thread = Params::items_per_thread; - static constexpr unsigned int bin_size = Params::bin_size; -}; - -typedef ::testing::Types< - // ----------------------------------------------------------------------- - // hipcub::BlockHistogramAlgorithm::BLOCK_HISTO_ATOMIC - // ----------------------------------------------------------------------- - params, - params, - params, - params, - params, - params, - params, - params, - params, - // ----------------------------------------------------------------------- - // hipcub::BlockHistogramAlgorithm::BLOCK_HISTO_SORT - // ----------------------------------------------------------------------- - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params -> InputArrayTestParams; - -TYPED_TEST_CASE(HipcubBlockHistogramInputArrayTests, InputArrayTestParams); - -template< - unsigned int BlockSize, - unsigned int ItemsPerThread, - unsigned int BinSize, - hipcub::BlockHistogramAlgorithm Algorithm, - class T -> -__global__ -void histogram_kernel(T* device_output, T* device_output_bin) -{ - const unsigned int index = ((hipBlockIdx_x * BlockSize) + hipThreadIdx_x) * ItemsPerThread; - unsigned int global_offset = hipBlockIdx_x * BinSize; - __shared__ T hist[BinSize]; - // load - T in_out[ItemsPerThread]; - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - in_out[j] = device_output[index + j]; - } - - using bhistogram_t = hipcub::BlockHistogram; - __shared__ typename bhistogram_t::TempStorage temp_storage; - bhistogram_t(temp_storage).Histogram(in_out, hist); - __syncthreads(); - - #pragma unroll - for (unsigned int offset = 0; offset < BinSize; offset += BlockSize) - { - if(offset + hipThreadIdx_x < BinSize) - { - device_output_bin[global_offset + hipThreadIdx_x] = hist[offset + hipThreadIdx_x]; - global_offset += BlockSize; - } - } -} - -TYPED_TEST(HipcubBlockHistogramInputArrayTests, Histogram) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - constexpr size_t items_per_thread = TestFixture::items_per_thread; - constexpr size_t bin = TestFixture::bin_size; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 37; - const size_t bin_sizes = bin * 37; - const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 0, T(bin - 1)); - - // Output reduce results - std::vector output_bin(bin_sizes, 0); - - // Calculate expected results on host - std::vector expected_bin(output_bin.size(), 0); - for(size_t i = 0; i < output.size() / items_per_block; i++) - { - for(size_t j = 0; j < items_per_block; j++) - { - auto bin_idx = i * bin; - auto idx = i * items_per_block + j; - expected_bin[bin_idx + static_cast(output[idx])]++; - } - } - - // Preparing device - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); - T* device_output_bin; - HIP_CHECK(hipMalloc(&device_output_bin, output_bin.size() * sizeof(T))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output_bin, output_bin.data(), - output_bin.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(histogram_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_bin - ); - - // Reading results back - HIP_CHECK( - hipMemcpy( - output_bin.data(), device_output_bin, - output_bin.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < output_bin.size(); i++) - { - ASSERT_EQ( - output_bin[i], expected_bin[i] - ); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_bin)); -} diff --git a/test/hipcub/test_hipcub_block_load_store.cpp b/test/hipcub/test_hipcub_block_load_store.cpp deleted file mode 100644 index 419bb9c56..000000000 --- a/test/hipcub/test_hipcub_block_load_store.cpp +++ /dev/null @@ -1,469 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -template< - class Type, - hipcub::BlockLoadAlgorithm Load, - hipcub::BlockStoreAlgorithm Store, - unsigned int BlockSize, - unsigned int ItemsPerThread -> -struct class_params -{ - using type = Type; - static constexpr hipcub::BlockLoadAlgorithm load_method = Load; - static constexpr hipcub::BlockStoreAlgorithm store_method = Store; - static constexpr unsigned int block_size = BlockSize; - static constexpr unsigned int items_per_thread = ItemsPerThread; -}; - -template -class HipcubBlockLoadStoreClassTests : public ::testing::Test { -public: - using params = ClassParams; -}; - -typedef ::testing::Types< - // BLOCK_LOAD_DIRECT - class_params, - class_params, - class_params, - class_params, - class_params, - class_params, - - class_params, - class_params, - class_params, - class_params, - class_params, - class_params, - - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_DIRECT, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_DIRECT, 64U, 1>, - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_DIRECT, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_DIRECT, 64U, 4>, - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_DIRECT, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_DIRECT, 256U, 1>, - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_DIRECT, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_DIRECT, 256U, 4>, - - // BLOCK_LOAD_VECTORIZE - class_params, - class_params, - class_params, - class_params, - class_params, - class_params, - - class_params, - class_params, - class_params, - class_params, - class_params, - class_params, - - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_VECTORIZE, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_VECTORIZE, 64U, 1>, - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_VECTORIZE, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_VECTORIZE, 64U, 4>, - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_VECTORIZE, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_VECTORIZE, 256U, 1>, - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_VECTORIZE, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_VECTORIZE, 256U, 4>, - - // BLOCK_LOAD_TRANSPOSE - class_params, - class_params, - class_params, - class_params, - class_params, - class_params, - - class_params, - class_params, - class_params, - class_params, - class_params, - class_params, - - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_TRANSPOSE, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_TRANSPOSE, 64U, 1>, - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_TRANSPOSE, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_TRANSPOSE, 64U, 4>, - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_TRANSPOSE, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_TRANSPOSE, 256U, 1>, - class_params, hipcub::BlockLoadAlgorithm::BLOCK_LOAD_TRANSPOSE, - hipcub::BlockStoreAlgorithm::BLOCK_STORE_TRANSPOSE, 256U, 4> - -> ClassParams; - -TYPED_TEST_CASE(HipcubBlockLoadStoreClassTests, ClassParams); - -template< - class Type, - hipcub::BlockLoadAlgorithm LoadMethod, - hipcub::BlockStoreAlgorithm StoreMethod, - unsigned int BlockSize, - unsigned int ItemsPerThread -> -__global__ -void load_store_kernel(Type* device_input, Type* device_output) -{ - Type items[ItemsPerThread]; - unsigned int offset = hipBlockIdx_x * BlockSize * ItemsPerThread; - hipcub::BlockLoad load; - hipcub::BlockStore store; - load.Load(device_input + offset, items); - store.Store(device_output + offset, items); -} - -TYPED_TEST(HipcubBlockLoadStoreClassTests, LoadStoreClass) -{ - using Type = typename TestFixture::params::type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr hipcub::BlockLoadAlgorithm load_method = TestFixture::params::load_method; - constexpr hipcub::BlockStoreAlgorithm store_method = TestFixture::params::store_method; - const size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr auto items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 113; - const auto grid_size = size / items_per_block; - // Given block size not supported - if(block_size > test_utils::get_max_block_size() || (block_size & (block_size - 1)) != 0) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(input.size(), 0); - - // Calculate expected results on host - std::vector expected(input.size(), 0); - for (size_t i = 0; i < 113; i++) - { - size_t block_offset = i * items_per_block; - for (size_t j = 0; j < items_per_block; j++) - { - expected[j + block_offset] = input[j + block_offset]; - } - } - - // Preparing device - Type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - Type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(typename decltype(input)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - load_store_kernel< - Type, load_method, store_method, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - - // Reading results from device - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class Type, - hipcub::BlockLoadAlgorithm LoadMethod, - hipcub::BlockStoreAlgorithm StoreMethod, - unsigned int BlockSize, - unsigned int ItemsPerThread -> -__global__ -void load_store_valid_kernel(Type* device_input, Type* device_output, size_t valid) -{ - Type items[ItemsPerThread]; - unsigned int offset = hipBlockIdx_x * BlockSize * ItemsPerThread; - hipcub::BlockLoad load; - hipcub::BlockStore store; - load.Load(device_input + offset, items, valid); - store.Store(device_output + offset, items, valid); -} - -TYPED_TEST(HipcubBlockLoadStoreClassTests, LoadStoreClassValid) -{ - using Type = typename TestFixture::params::type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr hipcub::BlockLoadAlgorithm load_method = TestFixture::params::load_method; - constexpr hipcub::BlockStoreAlgorithm store_method = TestFixture::params::store_method; - const size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr auto items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 113; - const auto grid_size = size / items_per_block; - // Given block size not supported - if(block_size > test_utils::get_max_block_size() || (block_size & (block_size - 1)) != 0) - { - return; - } - - const size_t valid = items_per_block - 32; - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(input.size(), 0); - - // Calculate expected results on host - std::vector expected(input.size(), 0); - for (size_t i = 0; i < 113; i++) - { - size_t block_offset = i * items_per_block; - for (size_t j = 0; j < items_per_block; j++) - { - if (j < valid) - { - expected[j + block_offset] = input[j + block_offset]; - } - } - } - - // Preparing device - Type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - Type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(typename decltype(input)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Have to initialize output for unvalid data to make sure they are not changed - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - load_store_valid_kernel< - Type, load_method, store_method, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, valid - ); - - // Reading results from device - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class Type, - hipcub::BlockLoadAlgorithm LoadMethod, - hipcub::BlockStoreAlgorithm StoreMethod, - unsigned int BlockSize, - unsigned int ItemsPerThread -> -__global__ -void load_store_valid_default_kernel(Type* device_input, Type* device_output, size_t valid, int _default) -{ - Type items[ItemsPerThread]; - unsigned int offset = hipBlockIdx_x * BlockSize * ItemsPerThread; - hipcub::BlockLoad load; - hipcub::BlockStore store; - load.Load(device_input + offset, items, valid, _default); - store.Store(device_output + offset, items); -} - -TYPED_TEST(HipcubBlockLoadStoreClassTests, LoadStoreClassDefault) -{ - using Type = typename TestFixture::params::type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr hipcub::BlockLoadAlgorithm load_method = TestFixture::params::load_method; - constexpr hipcub::BlockStoreAlgorithm store_method = TestFixture::params::store_method; - const size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr auto items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 113; - const auto grid_size = size / items_per_block; - // Given block size not supported - if(block_size > test_utils::get_max_block_size() || (block_size & (block_size - 1)) != 0) - { - return; - } - - const size_t valid = items_per_thread + 1; - int _default = -1; - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(input.size(), 0); - - // Calculate expected results on host - std::vector expected(input.size(), _default); - for (size_t i = 0; i < 113; i++) - { - size_t block_offset = i * items_per_block; - for (size_t j = 0; j < items_per_block; j++) - { - if (j < valid) - { - expected[j + block_offset] = input[j + block_offset]; - } - } - } - - // Preparing device - Type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - Type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(typename decltype(input)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - load_store_valid_default_kernel< - Type, load_method, store_method, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, valid, _default - ); - - // Reading results from device - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} diff --git a/test/hipcub/test_hipcub_block_radix_sort.cpp b/test/hipcub/test_hipcub_block_radix_sort.cpp deleted file mode 100644 index e3bc741b1..000000000 --- a/test/hipcub/test_hipcub_block_radix_sort.cpp +++ /dev/null @@ -1,443 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include -#include -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -template< - class Key, - class Value, - unsigned int BlockSize, - unsigned int ItemsPerThread, - bool Descending = false, - bool ToStriped = false, - unsigned int StartBit = 0, - unsigned int EndBit = sizeof(Key) * 8 -> -struct params -{ - using key_type = Key; - using value_type = Value; - static constexpr unsigned int block_size = BlockSize; - static constexpr unsigned int items_per_thread = ItemsPerThread; - static constexpr bool descending = Descending; - static constexpr bool to_striped = ToStriped; - static constexpr unsigned int start_bit = StartBit; - static constexpr unsigned int end_bit = EndBit; -}; - -template -class HipcubBlockRadixSort : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - // Power of 2 BlockSize - params, - params, - params, - params, - - // Non-power of 2 BlockSize - params, - params, - params, - params, - params, - - // Power of 2 BlockSize and ItemsPerThread > 1 - params, - params, - params, - - // Non-power of 2 BlockSize and ItemsPerThread > 1 - params, - params, - params, - params, - - // StartBit and EndBit - params, - params, - params, - - // Stability (a number of key values is lower than BlockSize * ItemsPerThread: some keys appear - // multiple times with different values or key parts outside [StartBit, EndBit)) - params, - params -> Params; - -TYPED_TEST_CASE(HipcubBlockRadixSort, Params); - -template -struct key_comparator -{ -private: - template - constexpr static bool all_bits() - { - return (CStartBit == 0 && CEndBit == sizeof(Key) * 8); - } - - template - auto compare(const Key& lhs, const Key& rhs) const - -> typename std::enable_if(), bool>::type - { - return Descending ? (rhs < lhs) : (lhs < rhs); - } - - template - auto compare(const Key& lhs, const Key& rhs) const - -> typename std::enable_if(), bool>::type - { - auto mask = (1ull << (EndBit - StartBit)) - 1; - auto l = (static_cast(lhs) >> StartBit) & mask; - auto r = (static_cast(rhs) >> StartBit) & mask; - return Descending ? (r < l) : (l < r); - } - -public: - static_assert( - key_comparator::all_bits() || std::is_unsigned::value, - "Test supports start and end bits only for unsigned integers" - ); - - bool operator()(const Key& lhs, const Key& rhs) - { - return this->compare(lhs, rhs); - } -}; - -template -struct key_value_comparator -{ - bool operator()(const std::pair& lhs, const std::pair& rhs) - { - return key_comparator()(lhs.first, rhs.first); - } -}; - -template< - unsigned int BlockSize, - unsigned int ItemsPerThread, - class key_type -> -__global__ -void sort_key_kernel( - key_type* device_keys_output, - bool to_striped, - bool descending, - unsigned int start_bit, - unsigned int end_bit) -{ - constexpr unsigned int items_per_block = BlockSize * ItemsPerThread; - const unsigned int lid = hipThreadIdx_x; - const unsigned int block_offset = hipBlockIdx_x * items_per_block; - - key_type keys[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_keys_output + block_offset, keys); - - hipcub::BlockRadixSort bsort; - if(to_striped) - { - if(descending) - bsort.SortDescendingBlockedToStriped(keys, start_bit, end_bit); - else - bsort.SortBlockedToStriped(keys, start_bit, end_bit); - - hipcub::StoreDirectStriped(lid, device_keys_output + block_offset, keys); - } - else - { - if(descending) - bsort.SortDescending(keys, start_bit, end_bit); - else - bsort.Sort(keys, start_bit, end_bit); - - hipcub::StoreDirectBlocked(lid, device_keys_output + block_offset, keys); - } -} - -TYPED_TEST(HipcubBlockRadixSort, SortKeys) -{ - using key_type = typename TestFixture::params::key_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr bool descending = TestFixture::params::descending; - constexpr bool to_striped = TestFixture::params::to_striped; - constexpr unsigned int start_bit = TestFixture::params::start_bit; - constexpr unsigned int end_bit = TestFixture::params::end_bit; - constexpr size_t items_per_block = block_size * items_per_thread; - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = items_per_block * 1134; - const size_t grid_size = size / items_per_block; - // Generate data - std::vector keys_output; - if(std::is_floating_point::value) - { - keys_output = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_output = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - // Calculate expected results on host - std::vector expected(keys_output); - for(size_t i = 0; i < size / items_per_block; i++) - { - std::stable_sort( - expected.begin() + (i * items_per_block), - expected.begin() + ((i + 1) * items_per_block), - key_comparator() - ); - } - - // Preparing device - key_type* device_keys_output; - HIP_CHECK(hipMalloc(&device_keys_output, keys_output.size() * sizeof(key_type))); - - HIP_CHECK( - hipMemcpy( - device_keys_output, keys_output.data(), - keys_output.size() * sizeof(typename decltype(keys_output)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(sort_key_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_keys_output, to_striped, descending, start_bit, end_bit - ); - - // Getting results to host - HIP_CHECK( - hipMemcpy( - keys_output.data(), device_keys_output, - keys_output.size() * sizeof(typename decltype(keys_output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Verifying results - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(keys_output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_keys_output)); -} - -template< - unsigned int BlockSize, - unsigned int ItemsPerThread, - class key_type, - class value_type -> -__global__ -void sort_key_value_kernel( - key_type* device_keys_output, - value_type* device_values_output, - bool to_striped, - bool descending, - unsigned int start_bit, - unsigned int end_bit) -{ - constexpr unsigned int items_per_block = BlockSize * ItemsPerThread; - const unsigned int lid = hipThreadIdx_x; - const unsigned int block_offset = hipBlockIdx_x * items_per_block; - - key_type keys[ItemsPerThread]; - value_type values[ItemsPerThread]; - hipcub::LoadDirectBlocked(lid, device_keys_output + block_offset, keys); - hipcub::LoadDirectBlocked(lid, device_values_output + block_offset, values); - - hipcub::BlockRadixSort bsort; - if(to_striped) - { - if(descending) - bsort.SortDescendingBlockedToStriped(keys, values, start_bit, end_bit); - else - bsort.SortBlockedToStriped(keys, values, start_bit, end_bit); - - hipcub::StoreDirectStriped(lid, device_keys_output + block_offset, keys); - hipcub::StoreDirectStriped(lid, device_values_output + block_offset, values); - } - else - { - if(descending) - bsort.SortDescending(keys, values, start_bit, end_bit); - else - bsort.Sort(keys, values, start_bit, end_bit); - - hipcub::StoreDirectBlocked(lid, device_keys_output + block_offset, keys); - hipcub::StoreDirectBlocked(lid, device_values_output + block_offset, values); - } -} - - -TYPED_TEST(HipcubBlockRadixSort, SortKeysValues) -{ - using key_type = typename TestFixture::params::key_type; - using value_type = typename TestFixture::params::value_type; - constexpr size_t block_size = TestFixture::params::block_size; - constexpr size_t items_per_thread = TestFixture::params::items_per_thread; - constexpr bool descending = TestFixture::params::descending; - constexpr bool to_striped = TestFixture::params::to_striped; - constexpr unsigned int start_bit = TestFixture::params::start_bit; - constexpr unsigned int end_bit = TestFixture::params::end_bit; - constexpr size_t items_per_block = block_size * items_per_thread; - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = items_per_block * 1134; - const size_t grid_size = size / items_per_block; - // Generate data - std::vector keys_output; - if(std::is_floating_point::value) - { - keys_output = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_output = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - std::vector values_output; - if(std::is_floating_point::value) - { - values_output = test_utils::get_random_data(size, (value_type)-1000, (value_type)+1000); - } - else - { - values_output = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - using key_value = std::pair; - - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_output[i], values_output[i]); - } - - for(size_t i = 0; i < size / items_per_block; i++) - { - std::stable_sort( - expected.begin() + (i * items_per_block), - expected.begin() + ((i + 1) * items_per_block), - key_value_comparator() - ); - } - - key_type* device_keys_output; - HIP_CHECK(hipMalloc(&device_keys_output, keys_output.size() * sizeof(key_type))); - value_type* device_values_output; - HIP_CHECK(hipMalloc(&device_values_output, values_output.size() * sizeof(value_type))); - - HIP_CHECK( - hipMemcpy( - device_keys_output, keys_output.data(), - keys_output.size() * sizeof(typename decltype(keys_output)::value_type), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_values_output, values_output.data(), - values_output.size() * sizeof(typename decltype(values_output)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(sort_key_value_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_keys_output, device_values_output, to_striped, descending, start_bit, end_bit - ); - - // Getting results to host - HIP_CHECK( - hipMemcpy( - keys_output.data(), device_keys_output, - keys_output.size() * sizeof(typename decltype(keys_output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - values_output.data(), device_values_output, - values_output.size() * sizeof(typename decltype(values_output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(keys_output[i], expected[i].first); - ASSERT_EQ(values_output[i], expected[i].second); - } - - HIP_CHECK(hipFree(device_keys_output)); - HIP_CHECK(hipFree(device_values_output)); -} - diff --git a/test/hipcub/test_hipcub_block_reduce.cpp b/test/hipcub/test_hipcub_block_reduce.cpp deleted file mode 100644 index c75ffd862..000000000 --- a/test/hipcub/test_hipcub_block_reduce.cpp +++ /dev/null @@ -1,442 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -// Params for tests -template< - class T, - unsigned int BlockSize = 256U, - unsigned int ItemsPerThread = 1U, - hipcub::BlockReduceAlgorithm Algorithm = hipcub::BlockReduceAlgorithm::BLOCK_REDUCE_WARP_REDUCTIONS -> -struct params -{ - using type = T; - static constexpr hipcub::BlockReduceAlgorithm algorithm = Algorithm; - static constexpr unsigned int block_size = BlockSize; - static constexpr unsigned int items_per_thread = ItemsPerThread; -}; - -// --------------------------------------------------------- -// Test for reduce ops taking single input value -// --------------------------------------------------------- - -template -class HipcubBlockReduceSingleValueTests : public ::testing::Test -{ -public: - using type = typename Params::type; - static constexpr hipcub::BlockReduceAlgorithm algorithm = Params::algorithm; - static constexpr unsigned int block_size = Params::block_size; -}; - -typedef ::testing::Types< - // ----------------------------------------------------------------------- - // hipcub::BlockReduceAlgorithm::BLOCK_REDUCE_WARP_REDUCTIONS - // ----------------------------------------------------------------------- - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - // uint tests - params, - params, - params, - // long tests - params, - params, - params, - // ----------------------------------------------------------------------- - // hipcub::BlockReduceAlgorithm::BLOCK_REDUCE_RAKING - // ----------------------------------------------------------------------- - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params -> SingleValueTestParams; - -TYPED_TEST_CASE(HipcubBlockReduceSingleValueTests, SingleValueTestParams); - -template< - unsigned int BlockSize, - hipcub::BlockReduceAlgorithm Algorithm, - class T -> -__global__ -void reduce_kernel(T* device_output, T* device_output_reductions) -{ - const unsigned int index = (hipBlockIdx_x * BlockSize) + hipThreadIdx_x; - T value = device_output[index]; - using breduce_t = hipcub::BlockReduce; - __shared__ typename breduce_t::TempStorage temp_storage; - value = breduce_t(temp_storage).Reduce(value, hipcub::Sum()); - if(hipThreadIdx_x == 0) - { - device_output_reductions[hipBlockIdx_x] = value; - } -} - -TYPED_TEST(HipcubBlockReduceSingleValueTests, Reduce) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = block_size * 113; - const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - std::vector output_reductions(size / block_size); - - // Calculate expected results on host - std::vector expected_reductions(output_reductions.size(), 0); - for(size_t i = 0; i < output.size() / block_size; i++) - { - T value = 0; - for(size_t j = 0; j < block_size; j++) - { - auto idx = i * block_size + j; - value += output[idx]; - } - expected_reductions[i] = value; - } - - // Preparing device - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); - T* device_output_reductions; - HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(reduce_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_reductions - ); - - // Reading results back - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Verifying results - for(size_t i = 0; i < output_reductions.size(); i++) - { - ASSERT_EQ(output_reductions[i], expected_reductions[i]); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); -} - -TYPED_TEST_CASE(HipcubBlockReduceSingleValueTests, SingleValueTestParams); - -template< - unsigned int BlockSize, - hipcub::BlockReduceAlgorithm Algorithm, - class T -> -__global__ -void reduce_valid_kernel(T* device_output, T* device_output_reductions, const unsigned int valid_items) -{ - const unsigned int index = (hipBlockIdx_x * BlockSize) + hipThreadIdx_x; - T value = device_output[index]; - using breduce_t = hipcub::BlockReduce; - __shared__ typename breduce_t::TempStorage temp_storage; - value = breduce_t(temp_storage).Reduce(value, hipcub::Sum(), valid_items); - if(hipThreadIdx_x == 0) - { - device_output_reductions[hipBlockIdx_x] = value; - } -} - -TYPED_TEST(HipcubBlockReduceSingleValueTests, ReduceValid) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - const unsigned int valid_items = test_utils::get_random_value(block_size - 10, block_size); - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = block_size * 113; - const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - std::vector output_reductions(size / block_size); - - // Calculate expected results on host - std::vector expected_reductions(output_reductions.size(), 0); - for(size_t i = 0; i < output.size() / block_size; i++) - { - T value = 0; - for(size_t j = 0; j < valid_items; j++) - { - auto idx = i * block_size + j; - value += output[idx]; - } - expected_reductions[i] = value; - } - - // Preparing device - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); - T* device_output_reductions; - HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(reduce_valid_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_reductions, valid_items - ); - - // Reading results back - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Verifying results - for(size_t i = 0; i < output_reductions.size(); i++) - { - ASSERT_EQ(output_reductions[i], expected_reductions[i]); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); -} - - -template -class HipcubBlockReduceInputArrayTests : public ::testing::Test -{ -public: - using type = typename Params::type; - static constexpr unsigned int block_size = Params::block_size; - static constexpr hipcub::BlockReduceAlgorithm algorithm = Params::algorithm; - static constexpr unsigned int items_per_thread = Params::items_per_thread; -}; - -typedef ::testing::Types< - // ----------------------------------------------------------------------- - // hipcub::BlockReduceAlgorithm::BLOCK_REDUCE_WARP_REDUCTIONS - // ----------------------------------------------------------------------- - params, - params, - params, - params, - params, - params, - params, - params, - params, - // ----------------------------------------------------------------------- - // hipcub::BlockReduceAlgorithm::BLOCK_REDUCE_RAKING - // ----------------------------------------------------------------------- - params, - params, - params, - params, - params, - params, - params, - params, - params -> InputArrayTestParams; - -TYPED_TEST_CASE(HipcubBlockReduceInputArrayTests, InputArrayTestParams); - -template< - unsigned int BlockSize, - unsigned int ItemsPerThread, - hipcub::BlockReduceAlgorithm Algorithm, - class T -> -__global__ -void reduce_array_kernel(T* device_output, T* device_output_reductions) -{ - const unsigned int index = ((hipBlockIdx_x * BlockSize) + hipThreadIdx_x) * ItemsPerThread; - // load - T in_out[ItemsPerThread]; - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - in_out[j] = device_output[index + j]; - } - - T reduction; - using breduce_t = hipcub::BlockReduce; - __shared__ typename breduce_t::TempStorage temp_storage; - reduction = breduce_t(temp_storage).Reduce(in_out, hipcub::Sum()); - - if(hipThreadIdx_x == 0) - { - device_output_reductions[hipBlockIdx_x] = reduction; - } -} - - -TYPED_TEST(HipcubBlockReduceInputArrayTests, Reduce) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - constexpr size_t items_per_thread = TestFixture::items_per_thread; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 37; - const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - - // Output reduce results - std::vector output_reductions(size / block_size, 0); - - // Calculate expected results on host - std::vector expected_reductions(output_reductions.size(), 0); - for(size_t i = 0; i < output.size() / items_per_block; i++) - { - T value = 0; - for(size_t j = 0; j < items_per_block; j++) - { - auto idx = i * items_per_block + j; - value += output[idx]; - } - expected_reductions[i] = value; - } - - // Preparing device - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); - T* device_output_reductions; - HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output_reductions, output_reductions.data(), - output_reductions.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(reduce_array_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_reductions - ); - - // Reading results back - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Verifying results - for(size_t i = 0; i < output_reductions.size(); i++) - { - ASSERT_NEAR( - output_reductions[i], expected_reductions[i], - static_cast(0.05) * expected_reductions[i] - ); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); -} - diff --git a/test/hipcub/test_hipcub_block_scan.cpp b/test/hipcub/test_hipcub_block_scan.cpp deleted file mode 100644 index 661446478..000000000 --- a/test/hipcub/test_hipcub_block_scan.cpp +++ /dev/null @@ -1,1677 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -// Params for tests -template< - class T, - unsigned int BlockSize = 256U, - unsigned int ItemsPerThread = 1U, - hipcub::BlockScanAlgorithm Algorithm = hipcub::BLOCK_SCAN_WARP_SCANS -> -struct params -{ - using type = T; - static constexpr hipcub::BlockScanAlgorithm algorithm = Algorithm; - static constexpr unsigned int block_size = BlockSize; - static constexpr unsigned int items_per_thread = ItemsPerThread; -}; - -// --------------------------------------------------------- -// Test for scan ops taking single input value -// --------------------------------------------------------- - -template -class HipcubBlockScanSingleValueTests : public ::testing::Test -{ -public: - using type = typename Params::type; - static constexpr hipcub::BlockScanAlgorithm algorithm = Params::algorithm; - static constexpr unsigned int block_size = Params::block_size; -}; - -typedef ::testing::Types< - // ----------------------------------------------------------------------- - // hipcub::BLOCK_SCAN_WARP_SCANS - // ----------------------------------------------------------------------- - params, - params, - params, - params, - params, - params, - params, - params, - // uint tests - params, - params, - params, - // long tests - params, - params, - params, - // ----------------------------------------------------------------------- - // hipcub::BLOCK_SCAN_RAKING - // ----------------------------------------------------------------------- - params, - params, - params, - params, - params, - params, - params, - params, - params, - params -> SingleValueTestParams; - -TYPED_TEST_CASE(HipcubBlockScanSingleValueTests, SingleValueTestParams); - -template< - unsigned int BlockSize, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void inclusive_scan_kernel(T* device_output) -{ - const unsigned int index = (hipBlockIdx_x * BlockSize) + hipThreadIdx_x; - T value = device_output[index]; - - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - bscan_t(temp_storage).InclusiveScan(value, value, hipcub::Sum()); - - device_output[index] = value; -} - -TYPED_TEST(HipcubBlockScanSingleValueTests, InclusiveScan) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = block_size * 113; - const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - for(size_t i = 0; i < output.size() / block_size; i++) - { - for(size_t j = 0; j < block_size; j++) - { - auto idx = i * block_size + j; - expected[idx] = output[idx] + expected[j > 0 ? idx-1 : idx]; - } - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(inclusive_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_output)); -} - -template< - unsigned int BlockSize, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void inclusive_scan_reduce_kernel(T* device_output, T* device_output_reductions) -{ - const unsigned int index = (hipBlockIdx_x * BlockSize) + hipThreadIdx_x; - T value = device_output[index]; - T reduction; - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - bscan_t(temp_storage).InclusiveScan(value, value, hipcub::Sum(), reduction); - device_output[index] = value; - if(hipThreadIdx_x == 0) - { - device_output_reductions[hipBlockIdx_x] = reduction; - } -} - -TYPED_TEST(HipcubBlockScanSingleValueTests, InclusiveScanReduce) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = block_size * 113; - const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - std::vector output_reductions(size / block_size); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - for(size_t i = 0; i < output.size() / block_size; i++) - { - for(size_t j = 0; j < block_size; j++) - { - auto idx = i * block_size + j; - expected[idx] = output[idx] + expected[j > 0 ? idx-1 : idx]; - } - expected_reductions[i] = expected[(i+1) * block_size - 1]; - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(inclusive_scan_reduce_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_reductions - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - for(size_t i = 0; i < output_reductions.size(); i++) - { - ASSERT_EQ(output_reductions[i], expected_reductions[i]); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); -} - -template< - unsigned int BlockSize, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void inclusive_scan_prefix_callback_kernel(T* device_output, T* device_output_bp, T block_prefix) -{ - const unsigned int index = (hipBlockIdx_x * BlockSize) + hipThreadIdx_x; - T prefix_value = block_prefix; - auto prefix_callback = [&prefix_value](T reduction) - { - T prefix = prefix_value; - prefix_value += reduction; - return prefix; - }; - - T value = device_output[index]; - - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - bscan_t(temp_storage).InclusiveScan(value, value, hipcub::Sum(), prefix_callback); - - device_output[index] = value; - if(hipThreadIdx_x == 0) - { - device_output_bp[hipBlockIdx_x] = prefix_value; - } -} - -TYPED_TEST(HipcubBlockScanSingleValueTests, InclusiveScanPrefixCallback) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = block_size * 113; - const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - std::vector output_block_prefixes(size / block_size); - T block_prefix = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_block_prefixes(output_block_prefixes.size(), 0); - for(size_t i = 0; i < output.size() / block_size; i++) - { - expected[i * block_size] = block_prefix; - for(size_t j = 0; j < block_size; j++) - { - auto idx = i * block_size + j; - expected[idx] = output[idx] + expected[j > 0 ? idx-1 : idx]; - } - expected_block_prefixes[i] = expected[(i+1) * block_size - 1]; - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_bp; - HIP_CHECK( - hipMalloc( - &device_output_bp, - output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(inclusive_scan_prefix_callback_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_bp, block_prefix - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_block_prefixes.data(), device_output_bp, - output_block_prefixes.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - for(size_t i = 0; i < output_block_prefixes.size(); i++) - { - ASSERT_EQ(output_block_prefixes[i], expected_block_prefixes[i]); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_bp)); -} - -template< - unsigned int BlockSize, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void exclusive_scan_kernel(T* device_output, T init) -{ - const unsigned int index = (hipBlockIdx_x * BlockSize) + hipThreadIdx_x; - T value = device_output[index]; - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - bscan_t(temp_storage).ExclusiveScan(value, value, init, hipcub::Sum()); - device_output[index] = value; -} - -TYPED_TEST(HipcubBlockScanSingleValueTests, ExclusiveScan) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = block_size * 113; - const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 241); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - for(size_t i = 0; i < output.size() / block_size; i++) - { - expected[i * block_size] = init; - for(size_t j = 1; j < block_size; j++) - { - auto idx = i * block_size + j; - expected[idx] = output[idx-1] + expected[idx-1]; - } - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(exclusive_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_output)); -} - -template< - unsigned int BlockSize, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void exclusive_scan_reduce_kernel(T* device_output, T* device_output_reductions, T init) -{ - const unsigned int index = (hipBlockIdx_x * BlockSize) + hipThreadIdx_x; - T value = device_output[index]; - T reduction; - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - bscan_t(temp_storage).ExclusiveScan(value, value, init, hipcub::Sum(), reduction); - device_output[index] = value; - if(hipThreadIdx_x == 0) - { - device_output_reductions[hipBlockIdx_x] = reduction; - } -} - -TYPED_TEST(HipcubBlockScanSingleValueTests, ExclusiveScanReduce) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = block_size * 113; - const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - const T init = test_utils::get_random_value(0, 100); - - // Output reduce results - std::vector output_reductions(size / block_size); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - for(size_t i = 0; i < output.size() / block_size; i++) - { - expected[i * block_size] = init; - for(size_t j = 1; j < block_size; j++) - { - auto idx = i * block_size + j; - expected[idx] = output[idx-1] + expected[idx-1]; - } - - expected_reductions[i] = 0; - for(size_t j = 0; j < block_size; j++) - { - auto idx = i * block_size + j; - expected_reductions[i] += output[idx]; - } - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(exclusive_scan_reduce_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_reductions, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - for(size_t i = 0; i < output_reductions.size(); i++) - { - ASSERT_EQ(output_reductions[i], expected_reductions[i]); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); -} - -template< - unsigned int BlockSize, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void exclusive_scan_prefix_callback_kernel(T* device_output, T* device_output_bp, T block_prefix) -{ - const unsigned int index = (hipBlockIdx_x * BlockSize) + hipThreadIdx_x; - T prefix_value = block_prefix; - auto prefix_callback = [&prefix_value](T reduction) - { - T prefix = prefix_value; - prefix_value += reduction; - return prefix; - }; - - T value = device_output[index]; - - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - bscan_t(temp_storage).ExclusiveScan(value, value, hipcub::Sum(), prefix_callback); - - device_output[index] = value; - if(hipThreadIdx_x == 0) - { - device_output_bp[hipBlockIdx_x] = prefix_value; - } -} - -TYPED_TEST(HipcubBlockScanSingleValueTests, ExclusiveScanPrefixCallback) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = block_size * 113; - const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - std::vector output_block_prefixes(size / block_size); - T block_prefix = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_block_prefixes(output_block_prefixes.size(), 0); - for(size_t i = 0; i < output.size() / block_size; i++) - { - expected[i * block_size] = block_prefix; - for(size_t j = 1; j < block_size; j++) - { - auto idx = i * block_size + j; - expected[idx] = output[idx-1] + expected[idx-1]; - } - - expected_block_prefixes[i] = block_prefix; - for(size_t j = 0; j < block_size; j++) - { - auto idx = i * block_size + j; - expected_block_prefixes[i] += output[idx]; - } - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_bp; - HIP_CHECK( - hipMalloc( - &device_output_bp, - output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(exclusive_scan_prefix_callback_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_bp, block_prefix - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_block_prefixes.data(), device_output_bp, - output_block_prefixes.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - for(size_t i = 0; i < output_block_prefixes.size(); i++) - { - ASSERT_EQ(output_block_prefixes[i], expected_block_prefixes[i]); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_bp)); -} - -TYPED_TEST(HipcubBlockScanSingleValueTests, CustomStruct) -{ - using base_type = typename TestFixture::type; - using T = test_utils::custom_test_type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t size = block_size * 113; - const size_t grid_size = size / block_size; - // Generate data - std::vector output(size); - { - std::vector random_values = - test_utils::get_random_data(2 * output.size(), 2, 200); - for(size_t i = 0; i < output.size(); i++) - { - output[i].x = random_values[i], - output[i].y = random_values[i + output.size()]; - } - } - - // Calculate expected results on host - std::vector expected(output.size(), T(0)); - for(size_t i = 0; i < output.size() / block_size; i++) - { - for(size_t j = 0; j < block_size; j++) - { - auto idx = i * block_size + j; - expected[idx] = output[idx] + expected[j > 0 ? idx-1 : idx]; - } - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(inclusive_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - HIP_CHECK(hipFree(device_output)); -} - -// // --------------------------------------------------------- -// // Test for scan ops taking array of values as input -// // --------------------------------------------------------- - -template -class HipcubBlockScanInputArrayTests : public ::testing::Test -{ -public: - using type = typename Params::type; - static constexpr unsigned int block_size = Params::block_size; - static constexpr hipcub::BlockScanAlgorithm algorithm = Params::algorithm; - static constexpr unsigned int items_per_thread = Params::items_per_thread; -}; - -typedef ::testing::Types< - // ----------------------------------------------------------------------- - // hipcub::BlockScanAlgorithm::using_warp_scan - // ----------------------------------------------------------------------- - params, - params, - params, - params, - params, - params, - params, - params, - // ----------------------------------------------------------------------- - // hipcub::BLOCK_SCAN_RAKING - // ----------------------------------------------------------------------- - params, - params, - params, - params, - params, - params, - params, - params -> InputArrayTestParams; - -TYPED_TEST_CASE(HipcubBlockScanInputArrayTests, InputArrayTestParams); - -template< - unsigned int BlockSize, - unsigned int ItemsPerThread, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void inclusive_scan_array_kernel(T* device_output) -{ - const unsigned int index = ((hipBlockIdx_x * BlockSize ) + hipThreadIdx_x) * ItemsPerThread; - - // load - T in_out[ItemsPerThread]; - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - in_out[j] = device_output[index + j]; - } - - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - bscan_t(temp_storage).InclusiveScan(in_out, in_out, hipcub::Sum()); - - // store - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - device_output[index + j] = in_out[j]; - } - -} - -TYPED_TEST(HipcubBlockScanInputArrayTests, InclusiveScan) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - constexpr size_t items_per_thread = TestFixture::items_per_thread; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 37; - const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - for(size_t i = 0; i < output.size() / items_per_block; i++) - { - for(size_t j = 0; j < items_per_block; j++) - { - auto idx = i * items_per_block + j; - expected[idx] = output[idx] + expected[j > 0 ? idx-1 : idx]; - } - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(inclusive_scan_array_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_NEAR( - output[i], expected[i], - static_cast(0.05) * expected[i] - ); - } - - HIP_CHECK(hipFree(device_output)); -} - -template< - unsigned int BlockSize, - unsigned int ItemsPerThread, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void inclusive_scan_reduce_array_kernel(T* device_output, T* device_output_reductions) -{ - const unsigned int index = ((hipBlockIdx_x * BlockSize ) + hipThreadIdx_x) * ItemsPerThread; - - // load - T in_out[ItemsPerThread]; - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - in_out[j] = device_output[index + j]; - } - - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - T reduction; - bscan_t(temp_storage).InclusiveScan(in_out, in_out, hipcub::Sum(), reduction); - - // store - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - device_output[index + j] = in_out[j]; - } - - if(hipThreadIdx_x == 0) - { - device_output_reductions[hipBlockIdx_x] = reduction; - } -} - -TYPED_TEST(HipcubBlockScanInputArrayTests, InclusiveScanReduce) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - constexpr size_t items_per_thread = TestFixture::items_per_thread; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 37; - const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - - // Output reduce results - std::vector output_reductions(size / block_size, 0); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - for(size_t i = 0; i < output.size() / items_per_block; i++) - { - for(size_t j = 0; j < items_per_block; j++) - { - auto idx = i * items_per_block + j; - expected[idx] = output[idx] + expected[j > 0 ? idx-1 : idx]; - } - expected_reductions[i] = expected[(i+1) * items_per_block - 1]; - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output_reductions, output_reductions.data(), - output_reductions.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(inclusive_scan_reduce_array_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_reductions - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_NEAR( - output[i], expected[i], - static_cast(0.05) * expected[i] - ); - } - - for(size_t i = 0; i < output_reductions.size(); i++) - { - ASSERT_NEAR( - output_reductions[i], expected_reductions[i], - static_cast(0.05) * expected_reductions[i] - ); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); -} - -template< - unsigned int BlockSize, - unsigned int ItemsPerThread, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void inclusive_scan_array_prefix_callback_kernel(T* device_output, T* device_output_bp, T block_prefix) -{ - const unsigned int index = ((hipBlockIdx_x * BlockSize) + hipThreadIdx_x) * ItemsPerThread; - T prefix_value = block_prefix; - auto prefix_callback = [&prefix_value](T reduction) - { - T prefix = prefix_value; - prefix_value += reduction; - return prefix; - }; - - // load - T in_out[ItemsPerThread]; - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - in_out[j] = device_output[index + j]; - } - - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - bscan_t(temp_storage).InclusiveScan(in_out, in_out, hipcub::Sum(), prefix_callback); - - // store - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - device_output[index + j] = in_out[j]; - } - - if(hipThreadIdx_x == 0) - { - device_output_bp[hipBlockIdx_x] = prefix_value; - } -} - -TYPED_TEST(HipcubBlockScanInputArrayTests, InclusiveScanPrefixCallback) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - constexpr size_t items_per_thread = TestFixture::items_per_thread; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 37; - const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - std::vector output_block_prefixes(size / items_per_block, 0); - T block_prefix = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_block_prefixes(output_block_prefixes.size(), 0); - for(size_t i = 0; i < output.size() / items_per_block; i++) - { - expected[i * items_per_block] = block_prefix; - for(size_t j = 0; j < items_per_block; j++) - { - auto idx = i * items_per_block + j; - expected[idx] = output[idx] + expected[j > 0 ? idx-1 : idx]; - } - expected_block_prefixes[i] = expected[(i+1) * items_per_block - 1]; - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_bp; - HIP_CHECK( - hipMalloc( - &device_output_bp, - output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output_bp, output_block_prefixes.data(), - output_block_prefixes.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - inclusive_scan_array_prefix_callback_kernel - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_bp, block_prefix - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_block_prefixes.data(), device_output_bp, - output_block_prefixes.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_NEAR( - output[i], expected[i], - static_cast(0.05) * expected[i] - ); - } - - for(size_t i = 0; i < output_block_prefixes.size(); i++) - { - ASSERT_NEAR( - output_block_prefixes[i], expected_block_prefixes[i], - static_cast(0.05) * expected_block_prefixes[i] - ); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_bp)); -} - -template< - unsigned int BlockSize, - unsigned int ItemsPerThread, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void exclusive_scan_array_kernel(T* device_output, T init) -{ - const unsigned int index = ((hipBlockIdx_x * BlockSize) + hipThreadIdx_x) * ItemsPerThread; - // load - T in_out[ItemsPerThread]; - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - in_out[j] = device_output[index + j]; - } - - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - bscan_t(temp_storage).ExclusiveScan(in_out, in_out, init, hipcub::Sum()); - - // store - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - device_output[index + j] = in_out[j]; - } -} - -TYPED_TEST(HipcubBlockScanInputArrayTests, ExclusiveScan) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - constexpr size_t items_per_thread = TestFixture::items_per_thread; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 37; - const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - for(size_t i = 0; i < output.size() / items_per_block; i++) - { - expected[i * items_per_block] = init; - for(size_t j = 1; j < items_per_block; j++) - { - auto idx = i * items_per_block + j; - expected[idx] = output[idx-1] + expected[idx-1]; - } - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(exclusive_scan_array_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_NEAR( - output[i], expected[i], - static_cast(0.05) * expected[i] - ); - } - - HIP_CHECK(hipFree(device_output)); -} - -template< - unsigned int BlockSize, - unsigned int ItemsPerThread, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void exclusive_scan_reduce_array_kernel(T* device_output, T* device_output_reductions, T init) -{ - const unsigned int index = ((hipBlockIdx_x * BlockSize) + hipThreadIdx_x) * ItemsPerThread; - // load - T in_out[ItemsPerThread]; - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - in_out[j] = device_output[index + j]; - } - - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - T reduction; - bscan_t(temp_storage).ExclusiveScan(in_out, in_out, init, hipcub::Sum(), reduction); - - // store - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - device_output[index + j] = in_out[j]; - } - - if(hipThreadIdx_x == 0) - { - device_output_reductions[hipBlockIdx_x] = reduction; - } -} - -TYPED_TEST(HipcubBlockScanInputArrayTests, ExclusiveScanReduce) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - constexpr size_t items_per_thread = TestFixture::items_per_thread; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 37; - const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - - // Output reduce results - std::vector output_reductions(size / block_size); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - for(size_t i = 0; i < output.size() / items_per_block; i++) - { - expected[i * items_per_block] = init; - for(size_t j = 1; j < items_per_block; j++) - { - auto idx = i * items_per_block + j; - expected[idx] = output[idx-1] + expected[idx-1]; - } - for(size_t j = 0; j < items_per_block; j++) - { - expected_reductions[i] += output[i * items_per_block + j]; - } - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - exclusive_scan_reduce_array_kernel - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_reductions, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_NEAR( - output[i], expected[i], - static_cast(0.05) * expected[i] - ); - } - - for(size_t i = 0; i < output_reductions.size(); i++) - { - ASSERT_NEAR( - output_reductions[i], expected_reductions[i], - static_cast(0.05) * expected_reductions[i] - ); - } -} - -template< - unsigned int BlockSize, - unsigned int ItemsPerThread, - hipcub::BlockScanAlgorithm Algorithm, - class T -> -__global__ -void exclusive_scan_prefix_callback_array_kernel( - T* device_output, - T* device_output_bp, - T block_prefix -) -{ - const unsigned int index = ((hipBlockIdx_x * BlockSize) + hipThreadIdx_x) * ItemsPerThread; - T prefix_value = block_prefix; - auto prefix_callback = [&prefix_value](T reduction) - { - T prefix = prefix_value; - prefix_value += reduction; - return prefix; - }; - - // load - T in_out[ItemsPerThread]; - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - in_out[j] = device_output[index+ j]; - } - - using bscan_t = hipcub::BlockScan; - __shared__ typename bscan_t::TempStorage temp_storage; - bscan_t(temp_storage).ExclusiveScan(in_out, in_out, hipcub::Sum(), prefix_callback); - - // store - for(unsigned int j = 0; j < ItemsPerThread; j++) - { - device_output[index + j] = in_out[j]; - } - - if(hipThreadIdx_x == 0) - { - device_output_bp[hipBlockIdx_x] = prefix_value; - } -} - -TYPED_TEST(HipcubBlockScanInputArrayTests, ExclusiveScanPrefixCallback) -{ - using T = typename TestFixture::type; - constexpr auto algorithm = TestFixture::algorithm; - constexpr size_t block_size = TestFixture::block_size; - constexpr size_t items_per_thread = TestFixture::items_per_thread; - - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) - { - return; - } - - const size_t items_per_block = block_size * items_per_thread; - const size_t size = items_per_block * 37; - const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 200); - std::vector output_block_prefixes(size / items_per_block); - T block_prefix = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_block_prefixes(output_block_prefixes.size(), 0); - for(size_t i = 0; i < output.size() / items_per_block; i++) - { - expected[i * items_per_block] = block_prefix; - for(size_t j = 1; j < items_per_block; j++) - { - auto idx = i * items_per_block + j; - expected[idx] = output[idx-1] + expected[idx-1]; - } - expected_block_prefixes[i] = block_prefix; - for(size_t j = 0; j < items_per_block; j++) - { - auto idx = i * items_per_block + j; - expected_block_prefixes[i] += output[idx]; - } - } - - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_bp; - HIP_CHECK( - hipMalloc( - &device_output_bp, - output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - exclusive_scan_prefix_callback_array_kernel - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_bp, block_prefix - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_block_prefixes.data(), device_output_bp, - output_block_prefixes.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_NEAR( - output[i], expected[i], - static_cast(0.05) * expected[i] - ); - } - - for(size_t i = 0; i < output_block_prefixes.size(); i++) - { - ASSERT_NEAR( - output_block_prefixes[i], expected_block_prefixes[i], - static_cast(0.05) * expected_block_prefixes[i] - ); - } - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_bp)); -} - diff --git a/test/hipcub/test_hipcub_device_histogram.cpp b/test/hipcub/test_hipcub_device_histogram.cpp deleted file mode 100644 index 32ad18694..000000000 --- a/test/hipcub/test_hipcub_device_histogram.cpp +++ /dev/null @@ -1,1002 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -// CUB's implementation of DeviceHistogram has unused parameters, -// disable the warning because all warnings are threated as errors: -#ifdef __HIP_PLATFORM_NVCC__ - #pragma GCC diagnostic ignored "-Wunused-parameter" -#endif - -#include -#include -#include -#include -#include -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -// rows, columns, (row_stride - columns * Channels) -std::vector> get_dims() -{ - std::vector> sizes = { - // Empty - std::make_tuple(0, 0, 0), - std::make_tuple(1, 0, 0), - std::make_tuple(0, 1, 0), - // Linear - std::make_tuple(1, 1, 0), - std::make_tuple(1, 53, 0), - std::make_tuple(1, 5096, 0), - std::make_tuple(1, 34567, 0), - std::make_tuple(1, (1 << 18) - 1220, 0), - // Strided - std::make_tuple(2, 1, 0), - std::make_tuple(10, 10, 11), - std::make_tuple(111, 111, 111), - std::make_tuple(128, 1289, 0), - std::make_tuple(12, 1000, 24), - std::make_tuple(123, 3000, 121), - std::make_tuple(1024, 512, 0), - std::make_tuple(2345, 49, 2), - std::make_tuple(17867, 41, 13), - }; - return sizes; -} - -// Generate values ouside the desired histogram range (+-10%) -// (correctly handling test cases like uchar [0, 256), ushort [0, 65536)) -template -inline auto get_random_samples(size_t size, U min, U max) - -> typename std::enable_if::value, std::vector>::type -{ - const long long min1 = static_cast(min); - const long long max1 = static_cast(max); - const long long d = max1 - min1; - return test_utils::get_random_data( - size, - static_cast(std::max(min1 - d / 10, static_cast(std::numeric_limits::lowest()))), - static_cast(std::min(max1 + d / 10, static_cast(std::numeric_limits::max()))) - ); -} - -template -inline auto get_random_samples(size_t size, U min, U max) - -> typename std::enable_if::value, std::vector>::type -{ - const double min1 = static_cast(min); - const double max1 = static_cast(max); - const double d = max1 - min1; - return test_utils::get_random_data( - size, - static_cast(std::max(min1 - d / 10, static_cast(std::numeric_limits::lowest()))), - static_cast(std::min(max1 + d / 10, static_cast(std::numeric_limits::max()))) - ); -} - -// Does nothing, used for testing iterators (not raw pointers) as samples input -template -struct transform_op -{ - __host__ __device__ inline - T operator()(T x) const - { - return x * 1; - } -}; - -template< - class SampleType, - unsigned int Bins, - int LowerLevel, - int UpperLevel, - class LevelType = SampleType, - class CounterType = int -> -struct params1 -{ - using sample_type = SampleType; - static constexpr unsigned int bins = Bins; - static constexpr int lower_level = LowerLevel; - static constexpr int upper_level = UpperLevel; - using level_type = LevelType; - using counter_type = CounterType; -}; - -template -class HipcubDeviceHistogramEven : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - params1, - params1, - params1, - params1, - params1, - params1, - - params1, - params1, - params1 -> Params1; - -TYPED_TEST_CASE(HipcubDeviceHistogramEven, Params1); - -TYPED_TEST(HipcubDeviceHistogramEven, Even) -{ - using sample_type = typename TestFixture::params::sample_type; - using counter_type = typename TestFixture::params::counter_type; - using level_type = typename TestFixture::params::level_type; - constexpr unsigned int bins = TestFixture::params::bins; - constexpr level_type lower_level = TestFixture::params::lower_level; - constexpr level_type upper_level = TestFixture::params::upper_level; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - for(auto dim : get_dims()) - { - SCOPED_TRACE( - testing::Message() << "with dim = {" << - std::get<0>(dim) << ", " << std::get<1>(dim) << ", " << std::get<2>(dim) << "}" - ); - - const size_t rows = std::get<0>(dim); - const size_t columns = std::get<1>(dim); - const size_t row_stride = columns + std::get<2>(dim); - - const size_t row_stride_bytes = row_stride * sizeof(sample_type); - const size_t size = std::max(1, rows * row_stride); - - // Generate data - std::vector input = get_random_samples(size, lower_level, upper_level); - - sample_type * d_input; - counter_type * d_histogram; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); - HIP_CHECK(hipMalloc(&d_histogram, bins * sizeof(counter_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(sample_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector histogram_expected(bins, 0); - const level_type scale = (upper_level - lower_level) / bins; - for(size_t row = 0; row < rows; row++) - { - for(size_t column = 0; column < columns; column++) - { - const sample_type sample = input[row * row_stride + column]; - const level_type s = static_cast(sample); - if(s >= lower_level && s < upper_level) - { - const int bin = (s - lower_level) / scale; - histogram_expected[bin]++; - } - } - } - - size_t temporary_storage_bytes = 0; - if(rows == 1) - { - HIP_CHECK( - hipcub::DeviceHistogram::HistogramEven( - nullptr, temporary_storage_bytes, - d_input, d_histogram, - bins + 1, lower_level, upper_level, - int(columns), - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceHistogram::HistogramEven( - nullptr, temporary_storage_bytes, - d_input, d_histogram, - bins + 1, lower_level, upper_level, - int(columns), int(rows), row_stride_bytes, - stream, debug_synchronous - ) - ); - } - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(rows == 1) - { - HIP_CHECK( - hipcub::DeviceHistogram::HistogramEven( - d_temporary_storage, temporary_storage_bytes, - d_input, d_histogram, - bins + 1, lower_level, upper_level, - int(columns), - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceHistogram::HistogramEven( - d_temporary_storage, temporary_storage_bytes, - d_input, d_histogram, - bins + 1, lower_level, upper_level, - int(columns), int(rows), row_stride_bytes, - stream, debug_synchronous - ) - ); - } - - std::vector histogram(bins); - HIP_CHECK( - hipMemcpy( - histogram.data(), d_histogram, - bins * sizeof(counter_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_input)); - HIP_CHECK(hipFree(d_histogram)); - - for(size_t i = 0; i < bins; i++) - { - ASSERT_EQ(histogram[i], histogram_expected[i]); - } - } -} - -template< - class SampleType, - unsigned int Bins, - int StartLevel = 0, - unsigned int MinBinWidth = 1, - unsigned int MaxBinWidth = 10, - class LevelType = SampleType, - class CounterType = int -> -struct params2 -{ - using sample_type = SampleType; - static constexpr unsigned int bins = Bins; - static constexpr int start_level = StartLevel; - static constexpr unsigned int min_bin_length = MinBinWidth; - static constexpr unsigned int max_bin_length = MaxBinWidth; - using level_type = LevelType; - using counter_type = CounterType; -}; - -template -class HipcubDeviceHistogramRange : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - params2, - params2, - params2, - params2, - params2, - - params2, - params2 -> Params2; - -TYPED_TEST_CASE(HipcubDeviceHistogramRange, Params2); - -TYPED_TEST(HipcubDeviceHistogramRange, Range) -{ - using sample_type = typename TestFixture::params::sample_type; - using counter_type = typename TestFixture::params::counter_type; - using level_type = typename TestFixture::params::level_type; - constexpr unsigned int bins = TestFixture::params::bins; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - std::random_device rd; - std::default_random_engine gen(rd()); - - std::uniform_int_distribution bin_length_dis( - TestFixture::params::min_bin_length, - TestFixture::params::max_bin_length - ); - - for(auto dim : get_dims()) - { - SCOPED_TRACE( - testing::Message() << "with dim = {" << - std::get<0>(dim) << ", " << std::get<1>(dim) << ", " << std::get<2>(dim) << "}" - ); - - const size_t rows = std::get<0>(dim); - const size_t columns = std::get<1>(dim); - const size_t row_stride = columns + std::get<2>(dim); - - const size_t row_stride_bytes = row_stride * sizeof(sample_type); - const size_t size = std::max(1, rows * row_stride); - - // Generate data - std::vector levels; - level_type level = TestFixture::params::start_level; - for(unsigned int bin = 0 ; bin < bins; bin++) - { - levels.push_back(level); - level += bin_length_dis(gen); - } - levels.push_back(level); - - std::vector input = get_random_samples(size, levels[0], levels[bins]); - - sample_type * d_input; - level_type * d_levels; - counter_type * d_histogram; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); - HIP_CHECK(hipMalloc(&d_levels, (bins + 1) * sizeof(level_type))); - HIP_CHECK(hipMalloc(&d_histogram, bins * sizeof(counter_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(sample_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_levels, levels.data(), - (bins + 1) * sizeof(level_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector histogram_expected(bins, 0); - for(size_t row = 0; row < rows; row++) - { - for(size_t column = 0; column < columns; column++) - { - const sample_type sample = input[row * row_stride + column]; - const level_type s = static_cast(sample); - if(s >= levels[0] && s < levels[bins]) - { - const auto bin_iter = std::upper_bound(levels.begin(), levels.end(), s); - histogram_expected[bin_iter - levels.begin() - 1]++; - } - } - } - - size_t temporary_storage_bytes = 0; - if(rows == 1) - { - HIP_CHECK( - hipcub::DeviceHistogram::HistogramRange( - nullptr, temporary_storage_bytes, - d_input, d_histogram, - bins + 1, d_levels, - int(columns), - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceHistogram::HistogramRange( - nullptr, temporary_storage_bytes, - d_input, d_histogram, - bins + 1, d_levels, - int(columns), int(rows), row_stride_bytes, - stream, debug_synchronous - ) - ); - } - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(rows == 1) - { - HIP_CHECK( - hipcub::DeviceHistogram::HistogramRange( - d_temporary_storage, temporary_storage_bytes, - d_input, d_histogram, - bins + 1, d_levels, - int(columns), - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceHistogram::HistogramRange( - d_temporary_storage, temporary_storage_bytes, - d_input, d_histogram, - bins + 1, d_levels, - int(columns), int(rows), row_stride_bytes, - stream, debug_synchronous - ) - ); - } - - std::vector histogram(bins); - HIP_CHECK( - hipMemcpy( - histogram.data(), d_histogram, - bins * sizeof(counter_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_input)); - HIP_CHECK(hipFree(d_levels)); - HIP_CHECK(hipFree(d_histogram)); - - for(size_t i = 0; i < bins; i++) - { - ASSERT_EQ(histogram[i], histogram_expected[i]); - } - } -} - -template< - class SampleType, - unsigned int Channels, - unsigned int ActiveChannels, - unsigned int Bins, - int LowerLevel, - int UpperLevel, - class LevelType = SampleType, - class CounterType = int -> -struct params3 -{ - using sample_type = SampleType; - static constexpr unsigned int channels = Channels; - static constexpr unsigned int active_channels = ActiveChannels; - static constexpr unsigned int bins = Bins; - static constexpr int lower_level = LowerLevel; - static constexpr int upper_level = UpperLevel; - using level_type = LevelType; - using counter_type = CounterType; -}; - -template -class HipcubDeviceHistogramMultiEven : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - params3, - params3, - params3, - params3, - params3, - params3, - params3, - - params3, - params3, - params3 -> Params3; - -TYPED_TEST_CASE(HipcubDeviceHistogramMultiEven, Params3); - -TYPED_TEST(HipcubDeviceHistogramMultiEven, MultiEven) -{ - using sample_type = typename TestFixture::params::sample_type; - using counter_type = typename TestFixture::params::counter_type; - using level_type = typename TestFixture::params::level_type; - constexpr unsigned int channels = TestFixture::params::channels; - constexpr unsigned int active_channels = TestFixture::params::active_channels; - - unsigned int bins[active_channels]; - int num_levels[active_channels]; - level_type lower_level[active_channels]; - level_type upper_level[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - // Use different ranges for different channels - constexpr level_type d = TestFixture::params::upper_level - TestFixture::params::lower_level; - const level_type scale = d / TestFixture::params::bins; - lower_level[channel] = TestFixture::params::lower_level + channel * d / 9; - upper_level[channel] = TestFixture::params::upper_level - channel * d / 7; - bins[channel] = (upper_level[channel] - lower_level[channel]) / scale; - upper_level[channel] = lower_level[channel] + bins[channel] * scale; - num_levels[channel] = bins[channel] + 1; - } - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - for(auto dim : get_dims()) - { - SCOPED_TRACE( - testing::Message() << "with dim = {" << - std::get<0>(dim) << ", " << std::get<1>(dim) << ", " << std::get<2>(dim) << "}" - ); - - const size_t rows = std::get<0>(dim); - const size_t columns = std::get<1>(dim); - const size_t row_stride = columns * channels + std::get<2>(dim); - - const size_t row_stride_bytes = row_stride * sizeof(sample_type); - const size_t size = std::max(1, rows * row_stride); - - // Generate data - std::vector input(size); - for(unsigned int channel = 0; channel < channels; channel++) - { - const size_t gen_columns = (row_stride + channels - 1) / channels; - const size_t gen_size = rows * gen_columns; - - std::vector channel_input; - if(channel < active_channels) - { - channel_input = get_random_samples(gen_size, lower_level[channel], upper_level[channel]); - } - else - { - channel_input = get_random_samples(gen_size, lower_level[0], upper_level[0]); - } - // Interleave values - for(size_t row = 0; row < rows; row++) - { - for(size_t column = 0; column < gen_columns; column++) - { - const size_t index = column * channels + channel; - if(index < row_stride) - { - input[row * row_stride + index] = channel_input[row * gen_columns + column]; - } - } - } - } - - sample_type * d_input; - counter_type * d_histogram[active_channels]; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); - for(unsigned int channel = 0; channel < active_channels; channel++) - { - HIP_CHECK(hipMalloc(&d_histogram[channel], bins[channel] * sizeof(counter_type))); - } - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(sample_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector histogram_expected[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - histogram_expected[channel] = std::vector(bins[channel], 0); - const level_type scale = (upper_level[channel] - lower_level[channel]) / bins[channel]; - - for(size_t row = 0; row < rows; row++) - { - for(size_t column = 0; column < columns; column++) - { - const sample_type sample = input[row * row_stride + column * channels + channel]; - const level_type s = static_cast(sample); - if(s >= lower_level[channel] && s < upper_level[channel]) - { - const int bin = (s - lower_level[channel]) / scale; - histogram_expected[channel][bin]++; - } - } - } - } - - hipcub::TransformInputIterator, sample_type *> d_input2( - d_input, - transform_op() - ); - - size_t temporary_storage_bytes = 0; - if(rows == 1) - { - HIP_CHECK(( - hipcub::DeviceHistogram::MultiHistogramEven( - nullptr, temporary_storage_bytes, - d_input2, - d_histogram, - num_levels, lower_level, upper_level, - int(columns), - stream, debug_synchronous - ) - )); - } - else - { - HIP_CHECK(( - hipcub::DeviceHistogram::MultiHistogramEven( - nullptr, temporary_storage_bytes, - d_input2, - d_histogram, - num_levels, lower_level, upper_level, - int(columns), int(rows), row_stride_bytes, - stream, debug_synchronous - ) - )); - } - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(rows == 1) - { - HIP_CHECK(( - hipcub::DeviceHistogram::MultiHistogramEven( - d_temporary_storage, temporary_storage_bytes, - d_input2, - d_histogram, - num_levels, lower_level, upper_level, - int(columns), - stream, debug_synchronous - ) - )); - } - else - { - HIP_CHECK(( - hipcub::DeviceHistogram::MultiHistogramEven( - d_temporary_storage, temporary_storage_bytes, - d_input2, - d_histogram, - num_levels, lower_level, upper_level, - int(columns), int(rows), row_stride_bytes, - stream, debug_synchronous - ) - )); - } - - std::vector histogram[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - histogram[channel] = std::vector(bins[channel]); - HIP_CHECK( - hipMemcpy( - histogram[channel].data(), d_histogram[channel], - bins[channel] * sizeof(counter_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipFree(d_histogram[channel])); - } - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_input)); - - for(unsigned int channel = 0; channel < active_channels; channel++) - { - SCOPED_TRACE(testing::Message() << "with channel = " << channel); - - for(size_t i = 0; i < bins[channel]; i++) - { - ASSERT_EQ(histogram[channel][i], histogram_expected[channel][i]); - } - } - } -} - -template< - class SampleType, - unsigned int Channels, - unsigned int ActiveChannels, - unsigned int Bins, - int StartLevel = 0, - unsigned int MinBinWidth = 1, - unsigned int MaxBinWidth = 10, - class LevelType = SampleType, - class CounterType = int -> -struct params4 -{ - using sample_type = SampleType; - static constexpr unsigned int channels = Channels; - static constexpr unsigned int active_channels = ActiveChannels; - static constexpr unsigned int bins = Bins; - static constexpr int start_level = StartLevel; - static constexpr unsigned int min_bin_length = MinBinWidth; - static constexpr unsigned int max_bin_length = MaxBinWidth; - using level_type = LevelType; - using counter_type = CounterType; -}; - -template -class HipcubDeviceHistogramMultiRange : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - params4, - params4, - params4, - params4, - params4, - - params4, - params4 -> Params4; - -TYPED_TEST_CASE(HipcubDeviceHistogramMultiRange, Params4); - -TYPED_TEST(HipcubDeviceHistogramMultiRange, MultiRange) -{ - using sample_type = typename TestFixture::params::sample_type; - using counter_type = typename TestFixture::params::counter_type; - using level_type = typename TestFixture::params::level_type; - constexpr unsigned int channels = TestFixture::params::channels; - constexpr unsigned int active_channels = TestFixture::params::active_channels; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - std::random_device rd; - std::default_random_engine gen(rd()); - - unsigned int bins[active_channels]; - int num_levels[active_channels]; - std::uniform_int_distribution bin_length_dis[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - // Use different ranges for different channels - bins[channel] = TestFixture::params::bins + channel; - num_levels[channel] = bins[channel] + 1; - bin_length_dis[channel] = std::uniform_int_distribution( - TestFixture::params::min_bin_length, - TestFixture::params::max_bin_length - ); - } - - for(auto dim : get_dims()) - { - SCOPED_TRACE( - testing::Message() << "with dim = {" << - std::get<0>(dim) << ", " << std::get<1>(dim) << ", " << std::get<2>(dim) << "}" - ); - - const size_t rows = std::get<0>(dim); - const size_t columns = std::get<1>(dim); - const size_t row_stride = columns * channels + std::get<2>(dim); - - const size_t row_stride_bytes = row_stride * sizeof(sample_type); - const size_t size = std::max(1, rows * row_stride); - - // Generate data - std::vector levels[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - level_type level = TestFixture::params::start_level; - for(unsigned int bin = 0 ; bin < bins[channel]; bin++) - { - levels[channel].push_back(level); - level += bin_length_dis[channel](gen); - } - levels[channel].push_back(level); - } - - std::vector input(size); - for(unsigned int channel = 0; channel < channels; channel++) - { - const size_t gen_columns = (row_stride + channels - 1) / channels; - const size_t gen_size = rows * gen_columns; - - std::vector channel_input; - if(channel < active_channels) - { - channel_input = get_random_samples( - gen_size, levels[channel][0], levels[channel][bins[channel]] - ); - } - else - { - channel_input = get_random_samples(gen_size, levels[0][0], levels[0][bins[0]]); - } - // Interleave values - for(size_t row = 0; row < rows; row++) - { - for(size_t column = 0; column < gen_columns; column++) - { - const size_t index = column * channels + channel; - if(index < row_stride) - { - input[row * row_stride + index] = channel_input[row * gen_columns + column]; - } - } - } - } - - sample_type * d_input; - level_type * d_levels[active_channels]; - counter_type * d_histogram[active_channels]; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); - for(unsigned int channel = 0; channel < active_channels; channel++) - { - HIP_CHECK(hipMalloc(&d_levels[channel], num_levels[channel] * sizeof(level_type))); - HIP_CHECK(hipMalloc(&d_histogram[channel], bins[channel] * sizeof(counter_type))); - } - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(sample_type), - hipMemcpyHostToDevice - ) - ); - for(unsigned int channel = 0; channel < active_channels; channel++) - { - HIP_CHECK( - hipMemcpy( - d_levels[channel], levels[channel].data(), - num_levels[channel] * sizeof(level_type), - hipMemcpyHostToDevice - ) - ); - } - - // Calculate expected results on host - std::vector histogram_expected[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - histogram_expected[channel] = std::vector(bins[channel], 0); - - for(size_t row = 0; row < rows; row++) - { - for(size_t column = 0; column < columns; column++) - { - const sample_type sample = input[row * row_stride + column * channels + channel]; - const level_type s = static_cast(sample); - if(s >= levels[channel][0] && s < levels[channel][bins[channel]]) - { - const auto bin_iter = std::upper_bound(levels[channel].begin(), levels[channel].end(), s); - const int bin = bin_iter - levels[channel].begin() - 1; - histogram_expected[channel][bin]++; - } - } - } - } - - hipcub::TransformInputIterator, sample_type *> d_input2( - d_input, - transform_op() - ); - - size_t temporary_storage_bytes = 0; - if(rows == 1) - { - HIP_CHECK(( - hipcub::DeviceHistogram::MultiHistogramRange( - nullptr, temporary_storage_bytes, - d_input2, d_histogram, - num_levels, d_levels, - int(columns), - stream, debug_synchronous - ) - )); - } - else - { - HIP_CHECK(( - hipcub::DeviceHistogram::MultiHistogramRange( - nullptr, temporary_storage_bytes, - d_input2, d_histogram, - num_levels, d_levels, - int(columns), int(rows), row_stride_bytes, - stream, debug_synchronous - ) - )); - } - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(rows == 1) - { - HIP_CHECK(( - hipcub::DeviceHistogram::MultiHistogramRange( - d_temporary_storage, temporary_storage_bytes, - d_input2, d_histogram, - num_levels, d_levels, - int(columns), - stream, debug_synchronous - ) - )); - } - else - { - HIP_CHECK(( - hipcub::DeviceHistogram::MultiHistogramRange( - d_temporary_storage, temporary_storage_bytes, - d_input2, d_histogram, - num_levels, d_levels, - int(columns), int(rows), row_stride_bytes, - stream, debug_synchronous - ) - )); - } - - std::vector histogram[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - histogram[channel] = std::vector(bins[channel]); - HIP_CHECK( - hipMemcpy( - histogram[channel].data(), d_histogram[channel], - bins[channel] * sizeof(counter_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipFree(d_levels[channel])); - HIP_CHECK(hipFree(d_histogram[channel])); - } - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_input)); - - for(unsigned int channel = 0; channel < active_channels; channel++) - { - SCOPED_TRACE(testing::Message() << "with channel = " << channel); - - for(size_t i = 0; i < bins[channel]; i++) - { - ASSERT_EQ(histogram[channel][i], histogram_expected[channel][i]); - } - } - } -} diff --git a/test/hipcub/test_hipcub_device_radix_sort.cpp b/test/hipcub/test_hipcub_device_radix_sort.cpp deleted file mode 100644 index 8abd1bc58..000000000 --- a/test/hipcub/test_hipcub_device_radix_sort.cpp +++ /dev/null @@ -1,653 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include -#include -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -template< - class Key, - class Value, - bool Descending = false, - unsigned int StartBit = 0, - unsigned int EndBit = sizeof(Key) * 8, - bool CheckHugeSizes = false -> -struct params -{ - using key_type = Key; - using value_type = Value; - static constexpr bool descending = Descending; - static constexpr unsigned int start_bit = StartBit; - static constexpr unsigned int end_bit = EndBit; - static constexpr bool check_huge_sizes = CheckHugeSizes; -}; - -template -class HipcubDeviceRadixSort : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - params, - params, - params, - params, - params, - params, - params, - params>, - - // start_bit and end_bit - params, - params, - params, - params, - params, - params, - params, - - // huge sizes to check correctness of more than 1 block per batch - params -> Params; - -TYPED_TEST_CASE(HipcubDeviceRadixSort, Params); - -template -struct key_comparator -{ -private: - template - constexpr static bool all_bits() - { - return (CStartBit == 0 && CEndBit == sizeof(Key) * 8); - } - - template - auto compare(const Key& lhs, const Key& rhs) const - -> typename std::enable_if(), bool>::type - { - return Descending ? (rhs < lhs) : (lhs < rhs); - } - - template - auto compare(const Key& lhs, const Key& rhs) const - -> typename std::enable_if(), bool>::type - { - auto mask = (1ull << (EndBit - StartBit)) - 1; - auto l = (static_cast(lhs) >> StartBit) & mask; - auto r = (static_cast(rhs) >> StartBit) & mask; - return Descending ? (r < l) : (l < r); - } - -public: - static_assert( - key_comparator::all_bits() || std::is_unsigned::value, - "Test supports start and end bits only for unsigned integers" - ); - - bool operator()(const Key& lhs, const Key& rhs) - { - return this->compare(lhs, rhs); - } -}; - -template -struct key_value_comparator -{ - bool operator()(const std::pair& lhs, const std::pair& rhs) - { - return key_comparator()(lhs.first, rhs.first); - } -}; - -std::vector get_sizes() -{ - std::vector sizes = { 1, 10, 53, 211, 1024, 2345, 4096, 34567, (1 << 16) - 1220, (1 << 23) - 76543 }; - const std::vector random_sizes = test_utils::get_random_data(10, 1, 100000); - sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); - return sizes; -} - -TYPED_TEST(HipcubDeviceRadixSort, SortKeys) -{ - using key_type = typename TestFixture::params::key_type; - constexpr bool descending = TestFixture::params::descending; - constexpr unsigned int start_bit = TestFixture::params::start_bit; - constexpr unsigned int end_bit = TestFixture::params::end_bit; - constexpr bool check_huge_sizes = TestFixture::params::check_huge_sizes; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - if(size > (1 << 20) && !check_huge_sizes) continue; - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector keys_input; - if(std::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(keys_input); - std::stable_sort(expected.begin(), expected.end(), key_comparator()); - - size_t temporary_storage_bytes = 0; - HIP_CHECK( - hipcub::DeviceRadixSort::SortKeys( - nullptr, temporary_storage_bytes, - d_keys_input, d_keys_output, size, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { - HIP_CHECK( - hipcub::DeviceRadixSort::SortKeysDescending( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, size, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceRadixSort::SortKeys( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, size, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_keys_output)); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(keys_output[i], expected[i]); - } - } -} - -TYPED_TEST(HipcubDeviceRadixSort, SortPairs) -{ - using key_type = typename TestFixture::params::key_type; - using value_type = typename TestFixture::params::value_type; - constexpr bool descending = TestFixture::params::descending; - constexpr unsigned int start_bit = TestFixture::params::start_bit; - constexpr unsigned int end_bit = TestFixture::params::end_bit; - constexpr bool check_huge_sizes = TestFixture::params::check_huge_sizes; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - if(size > (1 << 20) && !check_huge_sizes) continue; - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector keys_input; - if(std::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - std::vector values_input(size); - std::iota(values_input.begin(), values_input.end(), 0); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - value_type * d_values_input; - value_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); - HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - using key_value = std::pair; - - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_input[i], values_input[i]); - } - std::stable_sort( - expected.begin(), expected.end(), - key_value_comparator() - ); - - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes = 0; - HIP_CHECK( - hipcub::DeviceRadixSort::SortPairs( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, d_values_input, d_values_output, size, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { - HIP_CHECK( - hipcub::DeviceRadixSort::SortPairsDescending( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, d_values_input, d_values_output, size, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceRadixSort::SortPairs( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, d_values_input, d_values_output, size, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_values_input)); - - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector values_output(size); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values_output, - size * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_values_output)); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(keys_output[i], expected[i].first); - ASSERT_EQ(values_output[i], expected[i].second); - } - } -} - -TYPED_TEST(HipcubDeviceRadixSort, SortKeysDoubleBuffer) -{ - using key_type = typename TestFixture::params::key_type; - constexpr bool descending = TestFixture::params::descending; - constexpr unsigned int start_bit = TestFixture::params::start_bit; - constexpr unsigned int end_bit = TestFixture::params::end_bit; - constexpr bool check_huge_sizes = TestFixture::params::check_huge_sizes; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - if(size > (1 << 20) && !check_huge_sizes) continue; - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector keys_input; - if(std::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(keys_input); - std::stable_sort(expected.begin(), expected.end(), key_comparator()); - - hipcub::DoubleBuffer d_keys(d_keys_input, d_keys_output); - - size_t temporary_storage_bytes = 0; - HIP_CHECK( - hipcub::DeviceRadixSort::SortKeys( - nullptr, temporary_storage_bytes, - d_keys, size, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { - HIP_CHECK( - hipcub::DeviceRadixSort::SortKeysDescending( - d_temporary_storage, temporary_storage_bytes, - d_keys, size, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceRadixSort::SortKeys( - d_temporary_storage, temporary_storage_bytes, - d_keys, size, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys.Current(), - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_keys_output)); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(keys_output[i], expected[i]); - } - } -} - -TYPED_TEST(HipcubDeviceRadixSort, SortPairsDoubleBuffer) -{ - using key_type = typename TestFixture::params::key_type; - using value_type = typename TestFixture::params::value_type; - constexpr bool descending = TestFixture::params::descending; - constexpr unsigned int start_bit = TestFixture::params::start_bit; - constexpr unsigned int end_bit = TestFixture::params::end_bit; - constexpr bool check_huge_sizes = TestFixture::params::check_huge_sizes; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - if(size > (1 << 20) && !check_huge_sizes) continue; - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector keys_input; - if(std::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - std::vector values_input(size); - std::iota(values_input.begin(), values_input.end(), 0); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - value_type * d_values_input; - value_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); - HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - using key_value = std::pair; - - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_input[i], values_input[i]); - } - std::stable_sort( - expected.begin(), expected.end(), - key_value_comparator() - ); - - hipcub::DoubleBuffer d_keys(d_keys_input, d_keys_output); - hipcub::DoubleBuffer d_values(d_values_input, d_values_output); - - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes = 0; - HIP_CHECK( - hipcub::DeviceRadixSort::SortPairs( - d_temporary_storage, temporary_storage_bytes, - d_keys, d_values, size, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { - HIP_CHECK( - hipcub::DeviceRadixSort::SortPairsDescending( - d_temporary_storage, temporary_storage_bytes, - d_keys, d_values, size, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceRadixSort::SortPairs( - d_temporary_storage, temporary_storage_bytes, - d_keys, d_values, size, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys.Current(), - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector values_output(size); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values.Current(), - size * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_values_output)); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(keys_output[i], expected[i].first); - ASSERT_EQ(values_output[i], expected[i].second); - } - } -} diff --git a/test/hipcub/test_hipcub_device_reduce.cpp b/test/hipcub/test_hipcub_device_reduce.cpp deleted file mode 100644 index 6d9d5d963..000000000 --- a/test/hipcub/test_hipcub_device_reduce.cpp +++ /dev/null @@ -1,434 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include - -// Google Test -#include - -// HIP API -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -// Params for tests -template< - class InputType, - class OutputType = InputType -> -struct DeviceReduceParams -{ - using input_type = InputType; - using output_type = OutputType; -}; - -// --------------------------------------------------------- -// Test for reduction ops taking single input value -// --------------------------------------------------------- - -template -class HipcubDeviceReduceTests : public ::testing::Test -{ -public: - using input_type = typename Params::input_type; - using output_type = typename Params::output_type; - const bool debug_synchronous = false; -}; - -typedef ::testing::Types< - DeviceReduceParams, - DeviceReduceParams, - DeviceReduceParams, - DeviceReduceParams - #ifdef HIPCUB_ROCPRIM_API - , - DeviceReduceParams, test_utils::custom_test_type>, - DeviceReduceParams, test_utils::custom_test_type> - #endif - -> HipcubDeviceReduceTestsParams; - -std::vector get_sizes() -{ - std::vector sizes = { - 1, 10, 53, 211, - 1024, 2048, 5096, - 34567, (1 << 17) - 1220 - }; - const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384); - sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); - std::sort(sizes.begin(), sizes.end()); - return sizes; -} - -TYPED_TEST_CASE(HipcubDeviceReduceTests, HipcubDeviceReduceTestsParams); - -TYPED_TEST(HipcubDeviceReduceTests, Reduce) -{ - using T = typename TestFixture::input_type; - using U = typename TestFixture::output_type; - const bool debug_synchronous = TestFixture::debug_synchronous; - - const std::vector sizes = get_sizes(); - for(auto size : sizes) - { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 100); - std::vector output(1, 0); - - T * d_input; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - U expected = U(0); - for(unsigned int i = 0; i < input.size(); i++) - { - expected = expected + input[i]; - } - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - hipcub::DeviceReduce::Sum( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size(), - stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0U); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - hipcub::DeviceReduce::Sum( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size(), - stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0], expected, 0.01f)); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); - } -} - -TYPED_TEST(HipcubDeviceReduceTests, ReduceMinimum) -{ - using T = typename TestFixture::input_type; - using U = typename TestFixture::output_type; - const bool debug_synchronous = TestFixture::debug_synchronous; - - const std::vector sizes = get_sizes(); - for(auto size : sizes) - { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 100); - std::vector output(1, 0); - - T * d_input; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - hipcub::Min min_op; - // Calculate expected results on host - U expected = U(std::numeric_limits::max()); - for(unsigned int i = 0; i < input.size(); i++) - { - expected = min_op(expected, U(input[i])); - } - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - hipcub::DeviceReduce::Min( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size(), - stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0U); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - hipcub::DeviceReduce::Min( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size(), - stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0], expected, 0.01f)); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); - } -} - -TYPED_TEST(HipcubDeviceReduceTests, ReduceArgMinimum) -{ - using T = typename TestFixture::input_type; - using Iterator = typename hipcub::ArgIndexInputIterator; - using key_value = typename Iterator::value_type; - const bool debug_synchronous = TestFixture::debug_synchronous; - - const std::vector sizes = get_sizes(); - for(auto size : sizes) - { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 200); - std::vector output(1); - - T * d_input; - key_value * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(key_value))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - Iterator x(input.data()); - const key_value max(1, std::numeric_limits::max()); - key_value expected = std::accumulate(x, x + size, max, hipcub::ArgMin()); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - hipcub::DeviceReduce::ArgMin( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size(), - stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0U); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - hipcub::DeviceReduce::ArgMin( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size(), - stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(key_value), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0].key, expected.key, 0.01f)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0].value, expected.value, 0.01f)); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); - } -} - -TYPED_TEST(HipcubDeviceReduceTests, ReduceArgMaximum) -{ - using T = typename TestFixture::input_type; - using Iterator = typename hipcub::ArgIndexInputIterator; - using key_value = typename Iterator::value_type; - const bool debug_synchronous = TestFixture::debug_synchronous; - - const std::vector sizes = get_sizes(); - for(auto size : sizes) - { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 0, 100); - std::vector output(1); - - T * d_input; - key_value * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(key_value))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - Iterator x(input.data()); - const key_value min(1, std::numeric_limits::lowest()); - key_value expected = std::accumulate(x, x + size, min, hipcub::ArgMax()); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - hipcub::DeviceReduce::ArgMax( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size(), - stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0U); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - hipcub::DeviceReduce::ArgMax( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size(), - stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(key_value), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_EQ(output[0].key, expected.key); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0].value, expected.value, 0.01f)); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); - } -} diff --git a/test/hipcub/test_hipcub_device_reduce_by_key.cpp b/test/hipcub/test_hipcub_device_reduce_by_key.cpp deleted file mode 100644 index 921d56463..000000000 --- a/test/hipcub/test_hipcub_device_reduce_by_key.cpp +++ /dev/null @@ -1,282 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include -#include -#include -#include -#include - -// Google Test -#include -// HIP API -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -template< - class Key, - class Value, - class ReduceOp, - unsigned int MinSegmentLength, - unsigned int MaxSegmentLength, - class Aggregate = Value -> -struct params -{ - using key_type = Key; - using value_type = Value; - using reduce_op_type = ReduceOp; - static constexpr unsigned int min_segment_length = MinSegmentLength; - static constexpr unsigned int max_segment_length = MaxSegmentLength; - using aggregate_type = Aggregate; -}; - -template -class HipcubDeviceReduceByKey : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params -> Params; - -TYPED_TEST_CASE(HipcubDeviceReduceByKey, Params); - -std::vector get_sizes() -{ - std::vector sizes = { - 1024, 2048, 4096, 1792, - 1, 10, 53, 211, 500, - 2345, 11001, 34567, - 100000, - (1 << 16) - 1220, (1 << 23) - 76543 - }; - const std::vector random_sizes = test_utils::get_random_data(10, 1, 100000); - sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); - return sizes; -} - -TYPED_TEST(HipcubDeviceReduceByKey, ReduceByKey) -{ - using key_type = typename TestFixture::params::key_type; - using value_type = typename TestFixture::params::value_type; - using aggregate_type = typename TestFixture::params::aggregate_type; - using reduce_op_type = typename TestFixture::params::reduce_op_type; - using key_distribution_type = typename std::conditional< - std::is_floating_point::value, - std::uniform_real_distribution, - std::uniform_int_distribution - >::type; - - const bool debug_synchronous = false; - - reduce_op_type reduce_op; - hipcub::Equality key_compare_op; - - const std::vector sizes = get_sizes(); - - const unsigned int seed = 123; - std::default_random_engine gen(seed); - - for(size_t size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - hipStream_t stream = 0; // default - - // Generate data and calculate expected results - std::vector unique_expected; - std::vector aggregates_expected; - size_t unique_count_expected = 0; - - std::vector keys_input(size); - key_distribution_type key_delta_dis(1, 5); - std::uniform_int_distribution key_count_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - std::vector values_input = test_utils::get_random_data(size, 0, 100); - - size_t offset = 0; - key_type current_key = key_distribution_type(0, 100)(gen); - key_type prev_key = current_key; - while(offset < size) - { - const size_t key_count = key_count_dis(gen); - current_key += key_delta_dis(gen); - - const size_t end = std::min(size, offset + key_count); - for(size_t i = offset; i < end; i++) - { - keys_input[i] = current_key; - } - aggregate_type aggregate = values_input[offset]; - for(size_t i = offset + 1; i < end; i++) - { - aggregate = reduce_op(aggregate, static_cast(values_input[i])); - } - - // The first key of the segment must be written into unique - // (it may differ from other keys in case of custom key compraison operators) - if(unique_count_expected == 0 || !key_compare_op(prev_key, current_key)) - { - unique_expected.push_back(current_key); - unique_count_expected++; - aggregates_expected.push_back(aggregate); - } - else - { - aggregates_expected.back() = reduce_op(aggregates_expected.back(), aggregate); - } - - prev_key = current_key; - offset += key_count; - } - - key_type * d_keys_input; - value_type * d_values_input; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - key_type * d_unique_output; - aggregate_type * d_aggregates_output; - unsigned int * d_unique_count_output; - HIP_CHECK(hipMalloc(&d_unique_output, unique_count_expected * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_aggregates_output, unique_count_expected * sizeof(aggregate_type))); - HIP_CHECK(hipMalloc(&d_unique_count_output, sizeof(unsigned int))); - - size_t temporary_storage_bytes = 0; - - HIP_CHECK( - hipcub::DeviceReduce::ReduceByKey( - nullptr, temporary_storage_bytes, - d_keys_input, d_unique_output, - d_values_input, d_aggregates_output, - d_unique_count_output, - reduce_op, size, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - hipcub::DeviceReduce::ReduceByKey( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_unique_output, - d_values_input, d_aggregates_output, - d_unique_count_output, - reduce_op, size, - stream, debug_synchronous - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector unique_output(unique_count_expected); - std::vector aggregates_output(unique_count_expected); - std::vector unique_count_output(1); - HIP_CHECK( - hipMemcpy( - unique_output.data(), d_unique_output, - unique_count_expected * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - aggregates_output.data(), d_aggregates_output, - unique_count_expected * sizeof(aggregate_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - unique_count_output.data(), d_unique_count_output, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_unique_output)); - HIP_CHECK(hipFree(d_aggregates_output)); - HIP_CHECK(hipFree(d_unique_count_output)); - - ASSERT_EQ(unique_count_output[0], unique_count_expected); - - // Validating results - for(size_t i = 0; i < unique_count_expected; i++) - { - ASSERT_EQ(unique_output[i], unique_expected[i]); - if(std::is_integral::value) - { - ASSERT_EQ(aggregates_output[i], aggregates_expected[i]); - } - else if (std::is_floating_point::value) - { - auto tolerance = std::max( - std::abs(0.1f * aggregates_expected[i]), aggregate_type(0.01f) - ); - ASSERT_NEAR(aggregates_output[i], aggregates_expected[i], tolerance); - } - } - } -} diff --git a/test/hipcub/test_hipcub_device_run_length_encode.cpp b/test/hipcub/test_hipcub_device_run_length_encode.cpp deleted file mode 100644 index 0dc8e2dd2..000000000 --- a/test/hipcub/test_hipcub_device_run_length_encode.cpp +++ /dev/null @@ -1,404 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -// CUB's implementation of DeviceRunLengthEncode has unused parameters, -// disable the warning because all warnings are threated as errors: -#ifdef __HIP_PLATFORM_NVCC__ - #pragma GCC diagnostic ignored "-Wunused-parameter" -#endif - -#include -#include -#include -#include -#include -#include -#include - -// Google Test -#include -// HIP API -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -template< - class Key, - class Count, - unsigned int MinSegmentLength, - unsigned int MaxSegmentLength -> -struct params -{ - using key_type = Key; - using count_type = Count; - static constexpr unsigned int min_segment_length = MinSegmentLength; - static constexpr unsigned int max_segment_length = MaxSegmentLength; -}; - -template -class HipcubDeviceRunLengthEncode : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params, - params -> Params; - -TYPED_TEST_CASE(HipcubDeviceRunLengthEncode, Params); - -std::vector get_sizes() -{ - std::vector sizes = { - 1024, 2048, 4096, 1792, - 1, 10, 53, 211, 500, - 2345, 11001, 34567, - 100000, - (1 << 16) - 1220, (1 << 21) - 76543 - }; - const std::vector random_sizes = test_utils::get_random_data(5, 1, 100000); - sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); - return sizes; -} - -TYPED_TEST(HipcubDeviceRunLengthEncode, Encode) -{ - using key_type = typename TestFixture::params::key_type; - using count_type = typename TestFixture::params::count_type; - using key_distribution_type = typename std::conditional< - std::is_floating_point::value, - std::uniform_real_distribution, - std::uniform_int_distribution - >::type; - - const bool debug_synchronous = false; - - const std::vector sizes = get_sizes(); - - const unsigned int seed = 123; - std::default_random_engine gen(seed); - - for(size_t size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - hipStream_t stream = 0; // default - - // Generate data and calculate expected results - std::vector unique_expected; - std::vector counts_expected; - size_t runs_count_expected = 0; - - std::vector input(size); - key_distribution_type key_delta_dis(1, 5); - std::uniform_int_distribution key_count_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - std::vector values_input = test_utils::get_random_data(size, 0, 100); - - size_t offset = 0; - key_type current_key = key_distribution_type(0, 100)(gen); - while(offset < size) - { - size_t key_count = key_count_dis(gen); - current_key += key_delta_dis(gen); - - const size_t end = std::min(size, offset + key_count); - key_count = end - offset; - for(size_t i = offset; i < end; i++) - { - input[i] = current_key; - } - - unique_expected.push_back(current_key); - runs_count_expected++; - counts_expected.push_back(key_count); - - offset += key_count; - } - - key_type * d_input; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - key_type * d_unique_output; - count_type * d_counts_output; - count_type * d_runs_count_output; - HIP_CHECK(hipMalloc(&d_unique_output, runs_count_expected * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_counts_output, runs_count_expected * sizeof(count_type))); - HIP_CHECK(hipMalloc(&d_runs_count_output, sizeof(count_type))); - - size_t temporary_storage_bytes = 0; - - HIP_CHECK( - hipcub::DeviceRunLengthEncode::Encode( - nullptr, temporary_storage_bytes, - d_input, - d_unique_output, d_counts_output, d_runs_count_output, - size, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - hipcub::DeviceRunLengthEncode::Encode( - d_temporary_storage, temporary_storage_bytes, - d_input, - d_unique_output, d_counts_output, d_runs_count_output, - size, - stream, debug_synchronous - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector unique_output(runs_count_expected); - std::vector counts_output(runs_count_expected); - std::vector runs_count_output(1); - HIP_CHECK( - hipMemcpy( - unique_output.data(), d_unique_output, - runs_count_expected * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - counts_output.data(), d_counts_output, - runs_count_expected * sizeof(count_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - runs_count_output.data(), d_runs_count_output, - sizeof(count_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_input)); - HIP_CHECK(hipFree(d_unique_output)); - HIP_CHECK(hipFree(d_counts_output)); - HIP_CHECK(hipFree(d_runs_count_output)); - - // Validating results - - ASSERT_EQ(runs_count_output[0], static_cast(runs_count_expected)); - - for(size_t i = 0; i < runs_count_expected; i++) - { - ASSERT_EQ(unique_output[i], unique_expected[i]); - ASSERT_EQ(counts_output[i], counts_expected[i]); - } - } -} - -TYPED_TEST(HipcubDeviceRunLengthEncode, NonTrivialRuns) -{ - using key_type = typename TestFixture::params::key_type; - using count_type = typename TestFixture::params::count_type; - using offset_type = typename TestFixture::params::count_type; - using key_distribution_type = typename std::conditional< - std::is_floating_point::value, - std::uniform_real_distribution, - std::uniform_int_distribution - >::type; - - const bool debug_synchronous = false; - - const std::vector sizes = get_sizes(); - - const unsigned int seed = 123; - std::default_random_engine gen(seed); - - for(size_t size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - hipStream_t stream = 0; // default - - // Generate data and calculate expected results - std::vector offsets_expected; - std::vector counts_expected; - size_t runs_count_expected = 0; - - std::vector input(size); - key_distribution_type key_delta_dis(1, 5); - std::uniform_int_distribution key_count_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - std::bernoulli_distribution is_trivial_dis(0.1); - std::vector values_input = test_utils::get_random_data(size, 0, 100); - - size_t offset = 0; - key_type current_key = key_distribution_type(0, 100)(gen); - while(offset < size) - { - size_t key_count; - if(TestFixture::params::min_segment_length == 1 && is_trivial_dis(gen)) - { - // Increased probability of trivial runs for long segments - key_count = 1; - } - else - { - key_count = key_count_dis(gen); - } - current_key += key_delta_dis(gen); - - const size_t end = std::min(size, offset + key_count); - key_count = end - offset; - for(size_t i = offset; i < end; i++) - { - input[i] = current_key; - } - - if(key_count > 1) - { - offsets_expected.push_back(offset); - runs_count_expected++; - counts_expected.push_back(key_count); - } - - offset += key_count; - } - - key_type * d_input; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets_output; - count_type * d_counts_output; - count_type * d_runs_count_output; - HIP_CHECK(hipMalloc(&d_offsets_output, std::max(1, runs_count_expected) * sizeof(offset_type))); - HIP_CHECK(hipMalloc(&d_counts_output, std::max(1, runs_count_expected) * sizeof(count_type))); - HIP_CHECK(hipMalloc(&d_runs_count_output, sizeof(count_type))); - - size_t temporary_storage_bytes = 0; - - HIP_CHECK( - hipcub::DeviceRunLengthEncode::NonTrivialRuns( - nullptr, temporary_storage_bytes, - d_input, - d_offsets_output, d_counts_output, d_runs_count_output, - size, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - hipcub::DeviceRunLengthEncode::NonTrivialRuns( - d_temporary_storage, temporary_storage_bytes, - d_input, - d_offsets_output, d_counts_output, d_runs_count_output, - size, - stream, debug_synchronous - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector offsets_output(runs_count_expected); - std::vector counts_output(runs_count_expected); - std::vector runs_count_output(1); - if(runs_count_expected > 0) - { - HIP_CHECK( - hipMemcpy( - offsets_output.data(), d_offsets_output, - runs_count_expected * sizeof(offset_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - counts_output.data(), d_counts_output, - runs_count_expected * sizeof(count_type), - hipMemcpyDeviceToHost - ) - ); - } - HIP_CHECK( - hipMemcpy( - runs_count_output.data(), d_runs_count_output, - sizeof(count_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_input)); - HIP_CHECK(hipFree(d_offsets_output)); - HIP_CHECK(hipFree(d_counts_output)); - HIP_CHECK(hipFree(d_runs_count_output)); - - // Validating results - - ASSERT_EQ(runs_count_output[0], static_cast(runs_count_expected)); - - for(size_t i = 0; i < runs_count_expected; i++) - { - ASSERT_EQ(offsets_output[i], offsets_expected[i]); - ASSERT_EQ(counts_output[i], counts_expected[i]); - } - } -} diff --git a/test/hipcub/test_hipcub_device_scan.cpp b/test/hipcub/test_hipcub_device_scan.cpp deleted file mode 100644 index 3b606759d..000000000 --- a/test/hipcub/test_hipcub_device_scan.cpp +++ /dev/null @@ -1,272 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include - -// Google Test -#include -// HIP API -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -// Params for tests -template< - class InputType, - class OutputType = InputType -> -struct DeviceScanParams -{ - using input_type = InputType; - using output_type = OutputType; -}; - -// --------------------------------------------------------- -// Test for scan ops taking single input value -// --------------------------------------------------------- - -template -class HipcubDeviceScanTests : public ::testing::Test -{ -public: - using input_type = typename Params::input_type; - using output_type = typename Params::output_type; - const bool debug_synchronous = false; -}; - -typedef ::testing::Types< - DeviceScanParams, - DeviceScanParams, - DeviceScanParams, - DeviceScanParams -> HipcubDeviceScanTestsParams; - -std::vector get_sizes() -{ - std::vector sizes = { - 1, 10, 53, 211, - 1024, 2048, 5096, - 34567, (1 << 18) - 1220 - }; - const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384); - sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); - std::sort(sizes.begin(), sizes.end()); - return sizes; -} - -TYPED_TEST_CASE(HipcubDeviceScanTests, HipcubDeviceScanTestsParams); - -TYPED_TEST(HipcubDeviceScanTests, InclusiveScanSum) -{ - using T = typename TestFixture::input_type; - using U = typename TestFixture::output_type; - const bool debug_synchronous = TestFixture::debug_synchronous; - - const std::vector sizes = get_sizes(); - for(auto size : sizes) - { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 1); - std::vector output(input.size(), 0); - - T * d_input; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // scan function - hipcub::Sum sum_op; - - // Calculate expected results on host - std::vector expected(input.size()); - test_utils::host_inclusive_scan( - input.begin(), input.end(), - expected.begin(), sum_op - ); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - hipcub::DeviceScan::InclusiveScan( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, sum_op, input.size(), - stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0U); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - hipcub::DeviceScan::InclusiveScan( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, sum_op, input.size(), - stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - for(size_t i = 0; i < output.size(); i++) - { - auto diff = std::max(std::abs(0.01f * expected[i]), U(0.01f)); - if(std::is_integral::value) diff = 0; - ASSERT_NEAR(output[i], expected[i], diff) << "where index = " << i; - } - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); - } -} - -TYPED_TEST(HipcubDeviceScanTests, ExclusiveScanSum) -{ - using T = typename TestFixture::input_type; - using U = typename TestFixture::output_type; - const bool debug_synchronous = TestFixture::debug_synchronous; - - const std::vector sizes = get_sizes(); - for(auto size : sizes) - { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 10); - std::vector output(input.size()); - - T * d_input; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // scan function - hipcub::Sum sum_op; - - // Calculate expected results on host - std::vector expected(input.size()); - T initial_value = test_utils::get_random_value(1, 100); - test_utils::host_exclusive_scan( - input.begin(), input.end(), - initial_value, expected.begin(), - sum_op - ); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - hipcub::DeviceScan::ExclusiveScan( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, sum_op, initial_value, input.size(), - stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0U); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - hipcub::DeviceScan::ExclusiveScan( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, sum_op, initial_value, input.size(), - stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - for(size_t i = 0; i < output.size(); i++) - { - auto diff = std::max(std::abs(0.01f * expected[i]), U(0.01f)); - if(std::is_integral::value) diff = 0; - ASSERT_NEAR(output[i], expected[i], diff) << "where index = " << i; - } - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); - } -} diff --git a/test/hipcub/test_hipcub_device_segmented_radix_sort.cpp b/test/hipcub/test_hipcub_device_segmented_radix_sort.cpp deleted file mode 100644 index ad80ae3ef..000000000 --- a/test/hipcub/test_hipcub_device_segmented_radix_sort.cpp +++ /dev/null @@ -1,808 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include -#include -#include -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -template< - class Key, - class Value, - bool Descending, - unsigned int StartBit, - unsigned int EndBit, - unsigned int MinSegmentLength, - unsigned int MaxSegmentLength -> -struct params -{ - using key_type = Key; - using value_type = Value; - static constexpr bool descending = Descending; - static constexpr unsigned int start_bit = StartBit; - static constexpr unsigned int end_bit = EndBit; - static constexpr unsigned int min_segment_length = MinSegmentLength; - static constexpr unsigned int max_segment_length = MaxSegmentLength; -}; - -template -class HipcubDeviceSegmentedRadixSort : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - params, - params, - params, - params, - params, - params, - params, - - // start_bit and end_bit - params, - params, - params, - params, - params, - params, - params -> Params; - -TYPED_TEST_CASE(HipcubDeviceSegmentedRadixSort, Params); - -template -struct key_comparator -{ -private: - template - constexpr static bool all_bits() - { - return (CStartBit == 0 && CEndBit == sizeof(Key) * 8); - } - - template - auto compare(const Key& lhs, const Key& rhs) const - -> typename std::enable_if(), bool>::type - { - return Descending ? (rhs < lhs) : (lhs < rhs); - } - - template - auto compare(const Key& lhs, const Key& rhs) const - -> typename std::enable_if(), bool>::type - { - auto mask = (1ull << (EndBit - StartBit)) - 1; - auto l = (static_cast(lhs) >> StartBit) & mask; - auto r = (static_cast(rhs) >> StartBit) & mask; - return Descending ? (r < l) : (l < r); - } - -public: - static_assert( - key_comparator::all_bits() || std::is_unsigned::value, - "Test supports start and end bits only for unsigned integers" - ); - - bool operator()(const Key& lhs, const Key& rhs) - { - return this->compare(lhs, rhs); - } -}; - -template -struct key_value_comparator -{ - bool operator()(const std::pair& lhs, const std::pair& rhs) - { - return key_comparator()(lhs.first, rhs.first); - } -}; - -std::vector get_sizes() -{ - std::vector sizes = { - 1024, 2048, 4096, 1792, - 1, 10, 53, 211, 500, - 2345, 11001, 34567, - 1000000, - (1 << 16) - 1220 - }; - const std::vector random_sizes = test_utils::get_random_data(5, 1, 100000); - sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); - return sizes; -} - -TYPED_TEST(HipcubDeviceSegmentedRadixSort, SortKeys) -{ - using key_type = typename TestFixture::params::key_type; - constexpr bool descending = TestFixture::params::descending; - constexpr unsigned int start_bit = TestFixture::params::start_bit; - constexpr unsigned int end_bit = TestFixture::params::end_bit; - - using offset_type = unsigned int; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - std::random_device rd; - std::default_random_engine gen(rd()); - - std::uniform_int_distribution segment_length_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector keys_input; - if(std::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(keys_input); - for(size_t i = 0; i < segments_count; i++) - { - std::stable_sort( - expected.begin() + offsets[i], - expected.begin() + offsets[i + 1], - key_comparator() - ); - } - - size_t temporary_storage_bytes = 0; - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortKeys( - nullptr, temporary_storage_bytes, - d_keys_input, d_keys_output, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortKeysDescending( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortKeys( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_offsets)); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(keys_output[i], expected[i]); - } - } -} - -TYPED_TEST(HipcubDeviceSegmentedRadixSort, SortPairs) -{ - using key_type = typename TestFixture::params::key_type; - using value_type = typename TestFixture::params::value_type; - constexpr bool descending = TestFixture::params::descending; - constexpr unsigned int start_bit = TestFixture::params::start_bit; - constexpr unsigned int end_bit = TestFixture::params::end_bit; - - using offset_type = unsigned int; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - std::random_device rd; - std::default_random_engine gen(rd()); - - std::uniform_int_distribution segment_length_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector keys_input; - if(std::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - std::vector values_input(size); - std::iota(values_input.begin(), values_input.end(), 0); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - value_type * d_values_input; - value_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); - HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - using key_value = std::pair; - - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_input[i], values_input[i]); - } - for(size_t i = 0; i < segments_count; i++) - { - std::stable_sort( - expected.begin() + offsets[i], - expected.begin() + offsets[i + 1], - key_value_comparator() - ); - } - - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes = 0; - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortPairs( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, d_values_input, d_values_output, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortPairsDescending( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, d_values_input, d_values_output, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortPairs( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, d_values_input, d_values_output, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector values_output(size); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values_output, - size * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_values_output)); - HIP_CHECK(hipFree(d_offsets)); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(keys_output[i], expected[i].first); - ASSERT_EQ(values_output[i], expected[i].second); - } - } -} - -TYPED_TEST(HipcubDeviceSegmentedRadixSort, SortKeysDoubleBuffer) -{ - using key_type = typename TestFixture::params::key_type; - constexpr bool descending = TestFixture::params::descending; - constexpr unsigned int start_bit = TestFixture::params::start_bit; - constexpr unsigned int end_bit = TestFixture::params::end_bit; - - using offset_type = unsigned int; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - std::random_device rd; - std::default_random_engine gen(rd()); - - std::uniform_int_distribution segment_length_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector keys_input; - if(std::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(keys_input); - for(size_t i = 0; i < segments_count; i++) - { - std::stable_sort( - expected.begin() + offsets[i], - expected.begin() + offsets[i + 1], - key_comparator() - ); - } - - hipcub::DoubleBuffer d_keys(d_keys_input, d_keys_output); - - size_t temporary_storage_bytes = 0; - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortKeys( - nullptr, temporary_storage_bytes, - d_keys, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortKeysDescending( - d_temporary_storage, temporary_storage_bytes, - d_keys, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortKeys( - d_temporary_storage, temporary_storage_bytes, - d_keys, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys.Current(), - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_offsets)); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(keys_output[i], expected[i]); - } - } -} - -TYPED_TEST(HipcubDeviceSegmentedRadixSort, SortPairsDoubleBuffer) -{ - using key_type = typename TestFixture::params::key_type; - using value_type = typename TestFixture::params::value_type; - constexpr bool descending = TestFixture::params::descending; - constexpr unsigned int start_bit = TestFixture::params::start_bit; - constexpr unsigned int end_bit = TestFixture::params::end_bit; - - using offset_type = unsigned int; - - hipStream_t stream = 0; - - const bool debug_synchronous = false; - - std::random_device rd; - std::default_random_engine gen(rd()); - - std::uniform_int_distribution segment_length_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector keys_input; - if(std::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - std::vector values_input(size); - std::iota(values_input.begin(), values_input.end(), 0); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - value_type * d_values_input; - value_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); - HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - using key_value = std::pair; - - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_input[i], values_input[i]); - } - for(size_t i = 0; i < segments_count; i++) - { - std::stable_sort( - expected.begin() + offsets[i], - expected.begin() + offsets[i + 1], - key_value_comparator() - ); - } - - hipcub::DoubleBuffer d_keys(d_keys_input, d_keys_output); - hipcub::DoubleBuffer d_values(d_values_input, d_values_output); - - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes = 0; - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortPairs( - d_temporary_storage, temporary_storage_bytes, - d_keys, d_values, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortPairsDescending( - d_temporary_storage, temporary_storage_bytes, - d_keys, d_values, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - hipcub::DeviceSegmentedRadixSort::SortPairs( - d_temporary_storage, temporary_storage_bytes, - d_keys, d_values, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous - ) - ); - } - - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys.Current(), - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector values_output(size); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values.Current(), - size * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_values_output)); - HIP_CHECK(hipFree(d_offsets)); - - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(keys_output[i], expected[i].first); - ASSERT_EQ(values_output[i], expected[i].second); - } - } -} diff --git a/test/hipcub/test_hipcub_device_segmented_reduce.cpp b/test/hipcub/test_hipcub_device_segmented_reduce.cpp deleted file mode 100644 index 289da58ae..000000000 --- a/test/hipcub/test_hipcub_device_segmented_reduce.cpp +++ /dev/null @@ -1,518 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include -#include -#include -#include -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -std::vector get_sizes() -{ - std::vector sizes = { - 1024, 2048, 4096, 1792, - 1, 10, 53, 211, 500, - 2345, 11001, 34567, - 100000, - (1 << 16) - 1220 - }; - const std::vector random_sizes = test_utils::get_random_data(5, 1, 1000000); - sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); - return sizes; -} - -template< - class Input, - class Output, - class ReduceOp = hipcub::Sum, - int Init = 0, // as only integral types supported, int is used here even for floating point inputs - unsigned int MinSegmentLength = 0, - unsigned int MaxSegmentLength = 1000 -> -struct params1 -{ - using input_type = Input; - using output_type = Output; - using reduce_op_type = ReduceOp; - static constexpr input_type init = Init; - static constexpr unsigned int min_segment_length = MinSegmentLength; - static constexpr unsigned int max_segment_length = MaxSegmentLength; -}; - -template -class HipcubDeviceSegmentedReduceOp : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - params1, - params1, - params1, - params1, - params1, - params1 -> Params1; - -TYPED_TEST_CASE(HipcubDeviceSegmentedReduceOp, Params1); - -TYPED_TEST(HipcubDeviceSegmentedReduceOp, Reduce) -{ - using input_type = typename TestFixture::params::input_type; - using output_type = typename TestFixture::params::output_type; - using reduce_op_type = typename TestFixture::params::reduce_op_type; - - using result_type = output_type; - using offset_type = unsigned int; - - constexpr input_type init = TestFixture::params::init; - const bool debug_synchronous = false; - reduce_op_type reduce_op; - - std::random_device rd; - std::default_random_engine gen(rd()); - - std::uniform_int_distribution segment_length_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - hipStream_t stream = 0; // default - - // Generate data and calculate expected results - std::vector aggregates_expected; - - std::vector values_input = test_utils::get_random_data(size, 0, 100); - - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - - const size_t end = std::min(size, offset + segment_length); - result_type aggregate = init; - for(size_t i = offset; i < end; i++) - { - aggregate = reduce_op(aggregate, static_cast(values_input[i])); - } - aggregates_expected.push_back(aggregate); - - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - input_type * d_values_input; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(input_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(input_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - output_type * d_aggregates_output; - HIP_CHECK(hipMalloc(&d_aggregates_output, segments_count * sizeof(output_type))); - - size_t temporary_storage_bytes; - - HIP_CHECK( - hipcub::DeviceSegmentedReduce::Reduce( - nullptr, temporary_storage_bytes, - d_values_input, d_aggregates_output, - segments_count, - d_offsets, d_offsets + 1, - reduce_op, init, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - hipcub::DeviceSegmentedReduce::Reduce( - d_temporary_storage, temporary_storage_bytes, - d_values_input, d_aggregates_output, - segments_count, - d_offsets, d_offsets + 1, - reduce_op, init, - stream, debug_synchronous - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector aggregates_output(segments_count); - HIP_CHECK( - hipMemcpy( - aggregates_output.data(), d_aggregates_output, - segments_count * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_offsets)); - HIP_CHECK(hipFree(d_aggregates_output)); - - for(size_t i = 0; i < segments_count; i++) - { - if(std::is_integral::value) - { - ASSERT_EQ(aggregates_output[i], aggregates_expected[i]); - } - else - { - auto diff = std::max( - std::abs(0.01 * aggregates_expected[i]), output_type(0.01) - ); - ASSERT_NEAR(aggregates_output[i], aggregates_expected[i], diff); - } - } - } -} - -template< - class Input, - class Output, - unsigned int MinSegmentLength = 0, - unsigned int MaxSegmentLength = 1000 -> -struct params2 -{ - using input_type = Input; - using output_type = Output; - static constexpr unsigned int min_segment_length = MinSegmentLength; - static constexpr unsigned int max_segment_length = MaxSegmentLength; -}; - -template -class HipcubDeviceSegmentedReduce : public ::testing::Test { -public: - using params = Params; -}; - -typedef ::testing::Types< - params2, - params2, - params2, - params2, - params2, - params2 -> Params2; - -TYPED_TEST_CASE(HipcubDeviceSegmentedReduce, Params2); - -TYPED_TEST(HipcubDeviceSegmentedReduce, Sum) -{ - using input_type = typename TestFixture::params::input_type; - using output_type = typename TestFixture::params::output_type; - using reduce_op_type = typename hipcub::Sum; - using result_type = output_type; - using offset_type = unsigned int; - - constexpr input_type init = input_type(0); - const bool debug_synchronous = false; - reduce_op_type reduce_op; - - - std::random_device rd; - std::default_random_engine gen(rd()); - - std::uniform_int_distribution segment_length_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - hipStream_t stream = 0; // default - - // Generate data and calculate expected results - std::vector aggregates_expected; - - std::vector values_input = test_utils::get_random_data(size, 0, 100); - - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - - const size_t end = std::min(size, offset + segment_length); - result_type aggregate = init; - for(size_t i = offset; i < end; i++) - { - aggregate = reduce_op(aggregate, static_cast(values_input[i])); - } - aggregates_expected.push_back(aggregate); - - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - input_type * d_values_input; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(input_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(input_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - output_type * d_aggregates_output; - HIP_CHECK(hipMalloc(&d_aggregates_output, segments_count * sizeof(output_type))); - - size_t temporary_storage_bytes; - - HIP_CHECK( - hipcub::DeviceSegmentedReduce::Sum( - nullptr, temporary_storage_bytes, - d_values_input, d_aggregates_output, - segments_count, - d_offsets, d_offsets + 1, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - hipcub::DeviceSegmentedReduce::Sum( - d_temporary_storage, temporary_storage_bytes, - d_values_input, d_aggregates_output, - segments_count, - d_offsets, d_offsets + 1, - stream, debug_synchronous - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector aggregates_output(segments_count); - HIP_CHECK( - hipMemcpy( - aggregates_output.data(), d_aggregates_output, - segments_count * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_offsets)); - HIP_CHECK(hipFree(d_aggregates_output)); - - for(size_t i = 0; i < segments_count; i++) - { - if(std::is_integral::value) - { - ASSERT_EQ(aggregates_output[i], aggregates_expected[i]); - } - else - { - auto diff = std::max( - std::abs(0.01 * aggregates_expected[i]), output_type(0.01) - ); - ASSERT_NEAR(aggregates_output[i], aggregates_expected[i], diff); - } - } - } -} - -TYPED_TEST(HipcubDeviceSegmentedReduce, Min) -{ - using input_type = typename TestFixture::params::input_type; - using output_type = typename TestFixture::params::output_type; - using reduce_op_type = typename hipcub::Min; - using result_type = output_type; - using offset_type = unsigned int; - - constexpr input_type init = std::numeric_limits::max(); - const bool debug_synchronous = false; - reduce_op_type reduce_op; - - std::random_device rd; - std::default_random_engine gen(rd()); - - std::uniform_int_distribution segment_length_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - - const std::vector sizes = get_sizes(); - for(size_t size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - hipStream_t stream = 0; // default - - // Generate data and calculate expected results - std::vector aggregates_expected; - - std::vector values_input = test_utils::get_random_data(size, 0, 100); - - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - - const size_t end = std::min(size, offset + segment_length); - result_type aggregate = init; - for(size_t i = offset; i < end; i++) - { - aggregate = reduce_op(aggregate, static_cast(values_input[i])); - } - aggregates_expected.push_back(aggregate); - - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - input_type * d_values_input; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(input_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(input_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - output_type * d_aggregates_output; - HIP_CHECK(hipMalloc(&d_aggregates_output, segments_count * sizeof(output_type))); - - size_t temporary_storage_bytes; - - HIP_CHECK( - hipcub::DeviceSegmentedReduce::Min( - nullptr, temporary_storage_bytes, - d_values_input, d_aggregates_output, - segments_count, - d_offsets, d_offsets + 1, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - hipcub::DeviceSegmentedReduce::Min( - d_temporary_storage, temporary_storage_bytes, - d_values_input, d_aggregates_output, - segments_count, - d_offsets, d_offsets + 1, - stream, debug_synchronous - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector aggregates_output(segments_count); - HIP_CHECK( - hipMemcpy( - aggregates_output.data(), d_aggregates_output, - segments_count * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_offsets)); - HIP_CHECK(hipFree(d_aggregates_output)); - - for(size_t i = 0; i < segments_count; i++) - { - ASSERT_EQ(aggregates_output[i], aggregates_expected[i]); - } - } -} diff --git a/test/hipcub/test_hipcub_device_select.cpp b/test/hipcub/test_hipcub_device_select.cpp deleted file mode 100644 index 13165f675..000000000 --- a/test/hipcub/test_hipcub_device_select.cpp +++ /dev/null @@ -1,479 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include - -// Google Test -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(static_cast(error), hipSuccess) - -// Params for tests -template< - class InputType, - class OutputType = InputType, - class FlagType = unsigned int -> -struct DeviceSelectParams -{ - using input_type = InputType; - using output_type = OutputType; - using flag_type = FlagType; -}; - -template -class HipcubDeviceSelectTests : public ::testing::Test -{ -public: - using input_type = typename Params::input_type; - using output_type = typename Params::output_type; - using flag_type = typename Params::flag_type; - const bool debug_synchronous = false; -}; - -typedef ::testing::Types< - DeviceSelectParams, - DeviceSelectParams -> HipcubDeviceSelectTestsParams; - -std::vector get_sizes() -{ - std::vector sizes = { - 2, 32, 64, 256, - 1024, 2048, - 3072, 4096, - 27845, (1 << 18) + 1111 - }; - const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384); - sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); - std::sort(sizes.begin(), sizes.end()); - return sizes; -} - -TYPED_TEST_CASE(HipcubDeviceSelectTests, HipcubDeviceSelectTestsParams); - -TYPED_TEST(HipcubDeviceSelectTests, Flagged) -{ - using T = typename TestFixture::input_type; - using U = typename TestFixture::output_type; - using F = typename TestFixture::flag_type; - const bool debug_synchronous = TestFixture::debug_synchronous; - - hipStream_t stream = 0; // default stream - - const std::vector sizes = get_sizes(); - for(auto size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 100); - std::vector flags = test_utils::get_random_data(size, 0, 1); - - T * d_input; - F * d_flags; - U * d_output; - unsigned int * d_selected_count_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_flags, flags.size() * sizeof(F))); - HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); - HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_flags, flags.data(), - flags.size() * sizeof(F), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected; - expected.reserve(input.size()); - for(size_t i = 0; i < input.size(); i++) - { - if(flags[i] != 0) - { - expected.push_back(input[i]); - } - } - - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - hipcub::DeviceSelect::Flagged( - nullptr, - temp_storage_size_bytes, - d_input, - d_flags, - d_output, - d_selected_count_output, - input.size(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0U); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - hipcub::DeviceSelect::Flagged( - d_temp_storage, - temp_storage_size_bytes, - d_input, - d_flags, - d_output, - d_selected_count_output, - input.size(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if number of selected value is as expected - unsigned int selected_count_output = 0; - HIP_CHECK( - hipMemcpy( - &selected_count_output, d_selected_count_output, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_EQ(selected_count_output, expected.size()); - - // Check if output values are as expected - std::vector output(input.size()); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - for(size_t i = 0; i < expected.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; - } - - hipFree(d_input); - hipFree(d_flags); - hipFree(d_output); - hipFree(d_selected_count_output); - hipFree(d_temp_storage); - } -} - -struct TestSelectOp -{ - template - __host__ __device__ inline - bool operator()(const T& value) const - { - if(value > T(50)) return true; - return false; - } -}; - -TYPED_TEST(HipcubDeviceSelectTests, SelectOp) -{ - using T = typename TestFixture::input_type; - using U = typename TestFixture::output_type; - const bool debug_synchronous = TestFixture::debug_synchronous; - - hipStream_t stream = 0; // default stream - - TestSelectOp select_op; - - const std::vector sizes = get_sizes(); - for(auto size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 0, 100); - - T * d_input; - U * d_output; - unsigned int * d_selected_count_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); - HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected; - expected.reserve(input.size()); - for(size_t i = 0; i < input.size(); i++) - { - if(select_op(input[i])) - { - expected.push_back(input[i]); - } - } - - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - hipcub::DeviceSelect::If( - nullptr, - temp_storage_size_bytes, - d_input, - d_output, - d_selected_count_output, - input.size(), - select_op, - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0U); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - hipcub::DeviceSelect::If( - d_temp_storage, - temp_storage_size_bytes, - d_input, - d_output, - d_selected_count_output, - input.size(), - select_op, - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if number of selected value is as expected - unsigned int selected_count_output = 0; - HIP_CHECK( - hipMemcpy( - &selected_count_output, d_selected_count_output, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_EQ(selected_count_output, expected.size()); - - // Check if output values are as expected - std::vector output(input.size()); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - for(size_t i = 0; i < expected.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; - } - - hipFree(d_input); - hipFree(d_output); - hipFree(d_selected_count_output); - hipFree(d_temp_storage); - } -} - -std::vector get_discontinuity_probabilities() -{ - std::vector probabilities = { - 0.5, 0.25, 0.5, 0.75, 0.95 - }; - return probabilities; -} - -TYPED_TEST(HipcubDeviceSelectTests, Unique) -{ - using T = typename TestFixture::input_type; - using U = typename TestFixture::output_type; - const bool debug_synchronous = TestFixture::debug_synchronous; - - hipStream_t stream = 0; // default stream - - const auto sizes = get_sizes(); - const auto probabilities = get_discontinuity_probabilities(); - for(auto size : sizes) - { - SCOPED_TRACE(testing::Message() << "with size = " << size); - for(auto p : probabilities) - { - SCOPED_TRACE(testing::Message() << "with p = " << p); - - // Generate data - std::vector input(size); - { - std::vector input01 = test_utils::get_random_data01(size, p); - test_utils::host_inclusive_scan( - input01.begin(), input01.end(), input.begin(), hipcub::Sum() - ); - } - - // Allocate and copy to device - T * d_input; - U * d_output; - unsigned int * d_selected_count_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); - HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected; - expected.reserve(input.size()); - expected.push_back(input[0]); - for(size_t i = 1; i < input.size(); i++) - { - if(!(input[i-1] == input[i])) - { - expected.push_back(input[i]); - } - } - - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - hipcub::DeviceSelect::Unique( - nullptr, - temp_storage_size_bytes, - d_input, - d_output, - d_selected_count_output, - input.size(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0U); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - hipcub::DeviceSelect::Unique( - d_temp_storage, - temp_storage_size_bytes, - d_input, - d_output, - d_selected_count_output, - input.size(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if number of selected value is as expected - unsigned int selected_count_output = 0; - HIP_CHECK( - hipMemcpy( - &selected_count_output, d_selected_count_output, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_EQ(selected_count_output, expected.size()); - - // Check if output values are as expected - std::vector output(input.size()); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - for(size_t i = 0; i < expected.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; - } - - hipFree(d_input); - hipFree(d_output); - hipFree(d_selected_count_output); - hipFree(d_temp_storage); - } - } -} diff --git a/test/hipcub/test_hipcub_util_ptx.cpp b/test/hipcub/test_hipcub_util_ptx.cpp deleted file mode 100644 index 5dd7e4f43..000000000 --- a/test/hipcub/test_hipcub_util_ptx.cpp +++ /dev/null @@ -1,615 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -// Custom structure -struct custom_notaligned -{ - short i; - double d; - float f; - unsigned int u; - - HIPCUB_HOST_DEVICE - custom_notaligned() {}; - HIPCUB_HOST_DEVICE - ~custom_notaligned() {}; -}; - -HIPCUB_HOST_DEVICE -inline bool operator==(const custom_notaligned& lhs, - const custom_notaligned& rhs) -{ - return lhs.i == rhs.i && lhs.d == rhs.d - && lhs.f == rhs.f &&lhs.u == rhs.u; -} - -// Custom structure aligned to 16 bytes -struct custom_16aligned -{ - int i; - unsigned int u; - float f; - - HIPCUB_HOST_DEVICE - custom_16aligned() {}; - HIPCUB_HOST_DEVICE - ~custom_16aligned() {}; -} __attribute__((aligned(16))); - -inline HIPCUB_HOST_DEVICE -bool operator==(const custom_16aligned& lhs, const custom_16aligned& rhs) -{ - return lhs.i == rhs.i && lhs.f == rhs.f && lhs.u == rhs.u; -} - -// Params for tests -template -struct params -{ - using type = T; - static constexpr unsigned int logical_warp_size = LogicalWarpSize; -}; - -template -class HipcubUtilPtxTests : public ::testing::Test -{ -public: - using type = typename Params::type; - static constexpr unsigned int logical_warp_size = Params::logical_warp_size; -}; - -typedef ::testing::Types< - params, - params, - params, - params, - params, - params, - params, - params -> UtilPtxTestParams; - -TYPED_TEST_CASE(HipcubUtilPtxTests, UtilPtxTestParams); - -template -__global__ -void shuffle_up_kernel(T* data, unsigned int src_offset) -{ - const unsigned int index = (hipBlockIdx_x * hipBlockDim_x) + hipThreadIdx_x; - T value = data[index]; - - // first_thread argument is ignored in hipCUB with rocPRIM-backend - const unsigned int first_thread = 0; - // Using mask is not supported in rocPRIM, so we don't test other masks - const unsigned int member_mask = 0xffffffff; - value = hipcub::ShuffleUp( - value, src_offset, first_thread, member_mask - ); - - data[index] = value; -} - -TYPED_TEST(HipcubUtilPtxTests, ShuffleUp) -{ - using T = typename TestFixture::type; - constexpr unsigned int logical_warp_size = TestFixture::logical_warp_size; - const size_t hardware_warp_size = HIPCUB_WARP_THREADS; - const size_t size = hardware_warp_size; - - // Generate input - auto input = test_utils::get_random_data(size, T(-100), T(100)); - std::vector output(input.size()); - - auto src_offsets = test_utils::get_random_data( - std::max(1, logical_warp_size/2), - 1U, - std::max(1, logical_warp_size - 1) - ); - - T* device_data; - HIP_CHECK( - hipMalloc( - &device_data, - input.size() * sizeof(typename decltype(input)::value_type) - ) - ); - - for(auto src_offset : src_offsets) - { - SCOPED_TRACE(testing::Message() << "where src_offset = " << src_offset); - // Calculate expected results on host - std::vector expected(size, 0); - for(size_t i = 0; i < input.size()/logical_warp_size; i++) - { - for(size_t j = 0; j < logical_warp_size; j++) - { - size_t index = j + logical_warp_size * i; - auto up_index = j > src_offset-1 ? index-src_offset : index; - expected[index] = input[up_index]; - } - } - - // Writing to device memory - HIP_CHECK( - hipMemcpy( - device_data, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(shuffle_up_kernel), - dim3(1), dim3(hardware_warp_size), 0, 0, - device_data, src_offset - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_data, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; - } - } - hipFree(device_data); -} - -template -__global__ -void shuffle_down_kernel(T* data, unsigned int src_offset) -{ - const unsigned int index = (hipBlockIdx_x * hipBlockDim_x) + hipThreadIdx_x; - T value = data[index]; - - // last_thread argument is ignored in hipCUB with rocPRIM-backend - const unsigned int last_thread = LOGICAL_WARP_THREADS - 1; - // Using mask is not supported in rocPRIM, so we don't test other masks - const unsigned int member_mask = 0xffffffff; - value = hipcub::ShuffleDown( - value, src_offset, last_thread, member_mask - ); - - data[index] = value; -} - -TYPED_TEST(HipcubUtilPtxTests, ShuffleDown) -{ - using T = typename TestFixture::type; - constexpr unsigned int logical_warp_size = TestFixture::logical_warp_size; - const size_t hardware_warp_size = HIPCUB_WARP_THREADS; - const size_t size = hardware_warp_size; - - // Generate input - auto input = test_utils::get_random_data(size, T(-100), T(100)); - std::vector output(input.size()); - - auto src_offsets = test_utils::get_random_data( - std::max(1, logical_warp_size/2), - 1U, - std::max(1, logical_warp_size - 1) - ); - - T * device_data; - HIP_CHECK( - hipMalloc( - &device_data, - input.size() * sizeof(typename decltype(input)::value_type) - ) - ); - - for(auto src_offset : src_offsets) - { - SCOPED_TRACE(testing::Message() << "where src_offset = " << src_offset); - // Calculate expected results on host - std::vector expected(size, 0); - for(size_t i = 0; i < input.size()/logical_warp_size; i++) - { - for(size_t j = 0; j < logical_warp_size; j++) - { - size_t index = j + logical_warp_size * i; - auto down_index = j+src_offset < logical_warp_size ? index+src_offset : index; - expected[index] = input[down_index]; - } - } - - // Writing to device memory - HIP_CHECK( - hipMemcpy( - device_data, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(shuffle_down_kernel), - dim3(1), dim3(hardware_warp_size), 0, 0, - device_data, src_offset - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_data, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; - } - } - hipFree(device_data); -} - -template -__global__ -void shuffle_index_kernel(T* data, int* src_offsets) -{ - const unsigned int index = (hipBlockIdx_x * hipBlockDim_x) + hipThreadIdx_x; - T value = data[index]; - - // Using mask is not supported in rocPRIM, so we don't test other masks - const unsigned int member_mask = 0xffffffff; - value = hipcub::ShuffleIndex( - value, src_offsets[hipThreadIdx_x/LOGICAL_WARP_THREADS], member_mask - ); - - data[index] = value; -} - -TYPED_TEST(HipcubUtilPtxTests, ShuffleIndex) -{ - using T = typename TestFixture::type; - constexpr unsigned int logical_warp_size = TestFixture::logical_warp_size; - const size_t hardware_warp_size = HIPCUB_WARP_THREADS; - const size_t size = hardware_warp_size; - - // Generate input - auto input = test_utils::get_random_data(size, T(-100), T(100)); - std::vector output(input.size()); - - auto src_offsets = test_utils::get_random_data( - hardware_warp_size/logical_warp_size, 0, std::max(1, logical_warp_size - 1) - ); - - // Calculate expected results on host - std::vector expected(size, 0); - for(size_t i = 0; i < input.size()/logical_warp_size; i++) - { - int src_index = src_offsets[i]; - for(size_t j = 0; j < logical_warp_size; j++) - { - size_t index = j + logical_warp_size * i; - if(src_index >= int(logical_warp_size) || src_index < 0) src_index = index; - expected[index] = input[src_index + logical_warp_size * i]; - } - } - - // Writing to device memory - T* device_data; - int * device_src_offsets; - HIP_CHECK( - hipMalloc( - &device_data, - input.size() * sizeof(typename decltype(input)::value_type) - ) - ); - HIP_CHECK( - hipMalloc( - &device_src_offsets, - src_offsets.size() * sizeof(typename decltype(src_offsets)::value_type) - ) - ); - HIP_CHECK( - hipMemcpy( - device_data, input.data(), - input.size() * sizeof(typename decltype(input)::value_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - device_src_offsets, src_offsets.data(), - src_offsets.size() * sizeof(typename decltype(src_offsets)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(shuffle_index_kernel), - dim3(1), dim3(hardware_warp_size), 0, 0, - device_data, device_src_offsets - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_data, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; - } - - hipFree(device_data); - hipFree(device_src_offsets); -} - -TEST(HipcubUtilPtxTests, ShuffleUpCustomStruct) -{ - using T = custom_notaligned; - constexpr unsigned int hardware_warp_size = HIPCUB_WARP_THREADS; - constexpr unsigned int logical_warp_size = hardware_warp_size; - const size_t size = logical_warp_size; - - // Generate data - std::vector random_data = test_utils::get_random_data(4 * size, -100, 100); - std::vector input(size); - std::vector output(input.size()); - for(size_t i = 0; i < 4 * input.size(); i+=4) - { - input[i/4].i = random_data[i]; - input[i/4].d = random_data[i+1]; - input[i/4].f = random_data[i+2]; - input[i/4].u = random_data[i+3]; - } - - auto src_offsets = test_utils::get_random_data( - std::max(1, logical_warp_size/2), - 1U, - std::max(1, logical_warp_size - 1) - ); - - T* device_data; - HIP_CHECK( - hipMalloc( - &device_data, - input.size() * sizeof(typename decltype(input)::value_type) - ) - ); - - for(auto src_offset : src_offsets) - { - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < input.size()/logical_warp_size; i++) - { - for(size_t j = 0; j < logical_warp_size; j++) - { - size_t index = j + logical_warp_size * i; - auto up_index = j > src_offset-1 ? index-src_offset : index; - expected[index] = input[up_index]; - } - } - - // Writing to device memory - HIP_CHECK( - hipMemcpy( - device_data, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(shuffle_up_kernel), - dim3(1), dim3(hardware_warp_size), 0, 0, - device_data, src_offset - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_data, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; - } - } - hipFree(device_data); -} - -TEST(HipcubUtilPtxTests, ShuffleUpCustomAlignedStruct) -{ - using T = custom_16aligned; - constexpr unsigned int hardware_warp_size = HIPCUB_WARP_THREADS; - constexpr unsigned int logical_warp_size = hardware_warp_size; - const size_t size = logical_warp_size; - - // Generate data - std::vector random_data = test_utils::get_random_data(3 * size, -100, 100); - std::vector input(size); - std::vector output(input.size()); - for(size_t i = 0; i < 3 * input.size(); i+=3) - { - input[i/3].i = random_data[i]; - input[i/3].u = random_data[i+1]; - input[i/3].f = random_data[i+2]; - } - - auto src_offsets = test_utils::get_random_data( - std::max(1, logical_warp_size/2), - 1U, - std::max(1, logical_warp_size - 1) - ); - - T* device_data; - HIP_CHECK( - hipMalloc( - &device_data, - input.size() * sizeof(typename decltype(input)::value_type) - ) - ); - - for(auto src_offset : src_offsets) - { - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < input.size()/logical_warp_size; i++) - { - for(size_t j = 0; j < logical_warp_size; j++) - { - size_t index = j + logical_warp_size * i; - auto up_index = j > src_offset-1 ? index-src_offset : index; - expected[index] = input[up_index]; - } - } - - // Writing to device memory - HIP_CHECK( - hipMemcpy( - device_data, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(shuffle_up_kernel), - dim3(1), dim3(hardware_warp_size), 0, 0, - device_data, src_offset - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_data, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; - } - } - hipFree(device_data); -} - -__global__ -void warp_id_kernel(unsigned int* output) -{ - const unsigned int index = (hipBlockIdx_x * hipBlockDim_x) + hipThreadIdx_x; - output[index] = hipcub::WarpId(); -} - -TEST(HipcubUtilPtxTests, WarpId) -{ - constexpr unsigned int hardware_warp_size = HIPCUB_WARP_THREADS; - const size_t block_size = 4 * hardware_warp_size; - const size_t size = 16 * block_size; - - std::vector output(size); - unsigned int* device_output; - HIP_CHECK( - hipMalloc( - &device_output, - output.size() * sizeof(unsigned int) - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - warp_id_kernel, - dim3(size/block_size), dim3(block_size), 0, 0, - device_output - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - - std::vector warp_ids(block_size/hardware_warp_size, 0); - for(size_t i = 0; i < output.size()/hardware_warp_size; i++) - { - auto prev = output[i * hardware_warp_size]; - for(size_t j = 0; j < hardware_warp_size; j++) - { - auto index = j + i * hardware_warp_size; - // less than number of warps in thread block - ASSERT_LT(output[index], block_size/hardware_warp_size); - ASSERT_GE(output[index], 0U); // > 0 - ASSERT_EQ(output[index], prev); // all in warp_ids in warp are the same - } - warp_ids[prev]++; - } - // Check if each warp_id appears the same number of times. - for(auto warp_id_no : warp_ids) - { - ASSERT_EQ(warp_id_no, size/block_size); - } -} diff --git a/test/hipcub/test_hipcub_warp_reduce.cpp b/test/hipcub/test_hipcub_warp_reduce.cpp deleted file mode 100644 index 281d525da..000000000 --- a/test/hipcub/test_hipcub_warp_reduce.cpp +++ /dev/null @@ -1,591 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -template< - class T, - unsigned int WarpSize -> -struct params -{ - using type = T; - static constexpr unsigned int warp_size = WarpSize; -}; - -template -class HipcubWarpReduceTests : public ::testing::Test { -public: - using type = typename Params::type; - static constexpr unsigned int warp_size = Params::warp_size; -}; - -typedef ::testing::Types< - // shuffle based reduce - // Integer - params, - params, - params, - params, - params, - #ifdef HIPCUB_ROCPRIM_API - params, - #endif - // Float - params, - params, - params, - params, - params, - #ifdef HIPCUB_ROCPRIM_API - params, - #endif - - // shared memory reduce - // Integer - params, - params, - params, - #ifdef HIPCUB_ROCPRIM_API - params, - params, - #endif - // Float - params, - params, - params - #ifdef HIPCUB_ROCPRIM_API - ,params, - params - #endif -> HipcubWarpReduceTestParams; - -TYPED_TEST_CASE(HipcubWarpReduceTests, HipcubWarpReduceTestParams); - -template< - class T, - unsigned int BlockSize, - unsigned int LogicalWarpSize -> -__global__ -void warp_reduce_kernel(T* device_input, T* device_output) -{ - constexpr unsigned int warps_no = BlockSize / LogicalWarpSize; - const unsigned int warp_id = test_utils::logical_warp_id(); - unsigned int index = hipThreadIdx_x + (hipBlockIdx_x * hipBlockDim_x); - - T value = device_input[index]; - - using wreduce_t = hipcub::WarpReduce; - __shared__ typename wreduce_t::TempStorage storage[warps_no]; - auto reduce_op = hipcub::Sum(); - value = wreduce_t(storage[warp_id]).Reduce(value, reduce_op); - - if (hipThreadIdx_x % LogicalWarpSize == 0) - { - device_output[index / LogicalWarpSize] = value; - } -} - -TYPED_TEST(HipcubWarpReduceTests, Reduce) -{ - using T = typename TestFixture::type; - // logical warp side for warp primitive, execution warp size - // is always test_utils::warp_size() - constexpr size_t logical_warp_size = TestFixture::warp_size; - constexpr size_t block_size = - test_utils::is_power_of_two(logical_warp_size) - ? test_utils::max(test_utils::warp_size(), logical_warp_size * 4) - : (test_utils::warp_size()/logical_warp_size) * logical_warp_size; - unsigned int grid_size = 4; - const size_t size = block_size * grid_size; - - // Given warp size not supported - if(logical_warp_size > test_utils::warp_size()) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(size / logical_warp_size, 0); - std::vector expected(output.size(), 1); - - // Calculate expected results on host - for(size_t i = 0; i < output.size(); i++) - { - T value = 0; - for(size_t j = 0; j < logical_warp_size; j++) - { - auto idx = i * logical_warp_size + j; - value += input[idx]; - } - expected[i] = value; - } - - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < output.size(); i++) - { - auto diff = std::max(std::abs(0.1f * expected[i]), T(0.01f)); - if(std::is_integral::value) diff = 0; - ASSERT_NEAR(output[i], expected[i], diff); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class T, - unsigned int BlockSize, - unsigned int LogicalWarpSize -> -__global__ -void warp_reduce_valid_kernel(T* device_input, T* device_output, const int valid) -{ - constexpr unsigned int warps_no = BlockSize / LogicalWarpSize; - const unsigned int warp_id = test_utils::logical_warp_id(); - unsigned int index = hipThreadIdx_x + (hipBlockIdx_x * hipBlockDim_x); - - T value = device_input[index]; - - using wreduce_t = hipcub::WarpReduce; - __shared__ typename wreduce_t::TempStorage storage[warps_no]; - auto reduce_op = hipcub::Sum(); - value = wreduce_t(storage[warp_id]).Reduce(value, reduce_op, valid); - - if (hipThreadIdx_x % LogicalWarpSize == 0) - { - device_output[index / LogicalWarpSize] = value; - } -} - -TYPED_TEST(HipcubWarpReduceTests, ReduceValid) -{ - using T = typename TestFixture::type; - // logical warp side for warp primitive, execution warp size - // is always test_utils::warp_size() - constexpr size_t logical_warp_size = TestFixture::warp_size; - constexpr size_t block_size = - test_utils::is_power_of_two(logical_warp_size) - ? test_utils::max(test_utils::warp_size(), logical_warp_size * 4) - : (test_utils::warp_size()/logical_warp_size) * logical_warp_size; - unsigned int grid_size = 4; - const size_t size = block_size * grid_size; - const int valid = logical_warp_size - 1; - - // Given warp size not supported - if(logical_warp_size > test_utils::warp_size()) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(size / logical_warp_size, 0); - std::vector expected(output.size(), 1); - - // Calculate expected results on host - for(size_t i = 0; i < output.size(); i++) - { - T value = 0; - for(size_t j = 0; j < valid; j++) - { - auto idx = i * logical_warp_size + j; - value += input[idx]; - } - expected[i] = value; - } - - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_valid_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, valid - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - for(size_t i = 0; i < output.size(); i++) - { - auto diff = std::max(std::abs(0.1f * expected[i]), T(0.01f)); - if(std::is_integral::value) diff = 0; - ASSERT_NEAR(output[i], expected[i], diff); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class T, - class Flag, - unsigned int BlockSize, - unsigned int LogicalWarpSize -> -__global__ -void head_segmented_warp_reduce_kernel(T* input, Flag* flags, T* output) -{ - constexpr unsigned int warps_no = BlockSize / LogicalWarpSize; - const unsigned int warp_id = test_utils::logical_warp_id(); - unsigned int index = hipThreadIdx_x + (hipBlockIdx_x * hipBlockDim_x); - - T value = input[index]; - auto flag = flags[index]; - - using wreduce_t = hipcub::WarpReduce; - __shared__ typename wreduce_t::TempStorage storage[warps_no]; - value = wreduce_t(storage[warp_id]).HeadSegmentedSum(value, flag); - - output[index] = value; -} - -TYPED_TEST(HipcubWarpReduceTests, HeadSegmentedReduceSum) -{ - using T = typename TestFixture::type; - using flag_type = unsigned char; - // logical warp side for warp primitive, execution warp size - // is always test_utils::warp_size() - constexpr size_t logical_warp_size = TestFixture::warp_size; - constexpr size_t block_size = - test_utils::is_power_of_two(logical_warp_size) - ? test_utils::max(test_utils::warp_size(), logical_warp_size * 4) - : (test_utils::warp_size()/logical_warp_size) * logical_warp_size; - unsigned int grid_size = 4; - const size_t size = block_size * grid_size; - - // Given warp size not supported - if(logical_warp_size > test_utils::warp_size()) - { - return; - } - - #ifdef HIPCUB_CUB_API - // Bug in CUB - auto x = logical_warp_size; - if(x%2 != 0) - { - return; - } - #endif - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 10); // used for input - std::vector flags = test_utils::get_random_data01(size, 0.25f); - for(size_t i = 0; i < flags.size(); i+= logical_warp_size) - { - flags[i] = 1; - } - std::vector output(input.size()); - - T* device_input; - flag_type* device_flags; - T* device_output; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - HIP_CHECK(hipMalloc(&device_flags, flags.size() * sizeof(typename decltype(flags)::value_type))); - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - device_flags, flags.data(), - flags.size() * sizeof(flag_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected(output.size()); - size_t segment_head_index = 0; - T reduction = input[0]; - for(size_t i = 0; i < output.size(); i++) - { - if(i%logical_warp_size == 0 || flags[i]) - { - expected[segment_head_index] = reduction; - segment_head_index = i; - reduction = input[i]; - } - else - { - reduction = reduction + input[i]; - } - } - expected[segment_head_index] = reduction; - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(head_segmented_warp_reduce_kernel< - T, flag_type, block_size, logical_warp_size - >), - dim3(size/block_size), dim3(block_size), 0, 0, - device_input, device_flags, device_output - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - for(size_t i = 0; i < output.size(); i++) - { - if(flags[i]) - { - auto diff = std::max(std::abs(0.1f * expected[i]), T(0.01f)); - if(std::is_integral::value) diff = 0; - ASSERT_NEAR(output[i], expected[i], diff); - } - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_flags)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class T, - class Flag, - unsigned int BlockSize, - unsigned int LogicalWarpSize -> -__global__ -void tail_segmented_warp_reduce_kernel(T* input, Flag* flags, T* output) -{ - constexpr unsigned int warps_no = BlockSize / LogicalWarpSize; - const unsigned int warp_id = test_utils::logical_warp_id(); - unsigned int index = hipThreadIdx_x + (hipBlockIdx_x * hipBlockDim_x); - - T value = input[index]; - auto flag = flags[index]; - - using wreduce_t = hipcub::WarpReduce; - __shared__ typename wreduce_t::TempStorage storage[warps_no]; - auto reduce_op = hipcub::Sum(); - value = wreduce_t(storage[warp_id]).TailSegmentedReduce(value, flag, reduce_op); - - output[index] = value; -} - -TYPED_TEST(HipcubWarpReduceTests, TailSegmentedReduceSum) -{ - using T = typename TestFixture::type; - using flag_type = unsigned char; - // logical warp side for warp primitive, execution warp size - // is always test_utils::warp_size() - constexpr size_t logical_warp_size = TestFixture::warp_size; - constexpr size_t block_size = - test_utils::is_power_of_two(logical_warp_size) - ? test_utils::max(test_utils::warp_size(), logical_warp_size * 4) - : (test_utils::warp_size()/logical_warp_size) * logical_warp_size; - unsigned int grid_size = 4; - const size_t size = block_size * grid_size; - - // Given warp size not supported - if(logical_warp_size > test_utils::warp_size()) - { - return; - } - - #ifdef HIPCUB_CUB_API - // Bug in CUB - auto x = logical_warp_size; - if(x%2 != 0) - { - return; - } - #endif - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 10); // used for input - std::vector flags = test_utils::get_random_data01(size, 0.25f); - for(size_t i = logical_warp_size - 1; i < flags.size(); i+= logical_warp_size) - { - flags[i] = 1; - } - std::vector output(input.size()); - - T* device_input; - flag_type* device_flags; - T* device_output; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - HIP_CHECK(hipMalloc(&device_flags, flags.size() * sizeof(typename decltype(flags)::value_type))); - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - device_flags, flags.data(), - flags.size() * sizeof(flag_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected(output.size()); - std::vector segment_indexes; - size_t segment_index = 0; - T reduction; - for(size_t i = 0; i < output.size(); i++) - { - // single value segments - if(flags[i]) - { - expected[i] = input[i]; - segment_indexes.push_back(i); - } - else - { - segment_index = i; - reduction = input[i]; - auto next = i + 1; - while(next < output.size() && !flags[next]) - { - reduction = reduction + input[next]; - i++; - next++; - } - i++; - expected[segment_index] = reduction + input[i]; - segment_indexes.push_back(segment_index); - } - } - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(tail_segmented_warp_reduce_kernel< - T, flag_type, block_size, logical_warp_size - >), - dim3(size/block_size), dim3(block_size), 0, 0, - device_input, device_flags, device_output - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - for(size_t i = 0; i < segment_indexes.size(); i++) - { - auto index = segment_indexes[i]; - auto diff = std::max(std::abs(0.1f * expected[i]), T(0.01f)); - if(std::is_integral::value) diff = 0; - ASSERT_NEAR(output[index], expected[index], diff); - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_flags)); - HIP_CHECK(hipFree(device_output)); -} diff --git a/test/hipcub/test_hipcub_warp_scan.cpp b/test/hipcub/test_hipcub_warp_scan.cpp deleted file mode 100644 index 06f203484..000000000 --- a/test/hipcub/test_hipcub_warp_scan.cpp +++ /dev/null @@ -1,880 +0,0 @@ -// MIT License -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include -#include - -// Google Test -#include -// hipCUB API -#include - -#include "test_utils.hpp" - -#define HIP_CHECK(error) ASSERT_EQ(error, hipSuccess) - -// Params for tests -template< - class T, - unsigned int WarpSize -> -struct params -{ - using type = T; - static constexpr unsigned int warp_size = WarpSize; -}; - -// --------------------------------------------------------- -// Test for scan ops taking single input value -// --------------------------------------------------------- - -template -class HipcubWarpScanTests : public ::testing::Test { -public: - using type = typename Params::type; - static constexpr unsigned int warp_size = Params::warp_size; -}; - -typedef ::testing::Types< - - // shuffle based scan - // Integer - params, - params, - params, - params, - params, - #ifdef HIPCUB_ROCPRIM_API - params, - #endif - // Float - params, - params, - params, - params, - params, - #ifdef HIPCUB_ROCPRIM_API - params, - #endif - - // shared memory scan - // Integer - params, - params, - params, - #ifdef HIPCUB_ROCPRIM_API - params, - params, - #endif - // Float - params, - params, - params - #ifdef HIPCUB_ROCPRIM_API - ,params, - params - #endif - -> HipcubWarpScanTestParams; - -TYPED_TEST_CASE(HipcubWarpScanTests, HipcubWarpScanTestParams); - -template< - class T, - unsigned int BlockSize, - unsigned int LogicalWarpSize -> -__global__ -void warp_inclusive_scan_kernel(T* device_input, T* device_output) -{ - constexpr unsigned int warps_no = BlockSize / LogicalWarpSize; - const unsigned int warp_id = test_utils::logical_warp_id(); - unsigned int index = hipThreadIdx_x + (hipBlockIdx_x * hipBlockDim_x); - - T value = device_input[index]; - - using wscan_t = hipcub::WarpScan; - __shared__ typename wscan_t::TempStorage storage[warps_no]; - auto scan_op = hipcub::Sum(); - wscan_t(storage[warp_id]).InclusiveScan(value, value, scan_op); - - device_output[index] = value; -} - -TYPED_TEST(HipcubWarpScanTests, InclusiveScan) -{ - using T = typename TestFixture::type; - // logical warp side for warp primitive, execution warp size - // is always test_utils::warp_size() - constexpr size_t logical_warp_size = TestFixture::warp_size; - constexpr size_t block_size = - test_utils::is_power_of_two(logical_warp_size) - ? test_utils::max(test_utils::warp_size(), logical_warp_size * 4) - : (test_utils::warp_size()/logical_warp_size) * logical_warp_size; - unsigned int grid_size = 4; - const size_t size = block_size * grid_size; - - // Given warp size not supported - if(logical_warp_size > test_utils::warp_size()) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(size); - std::vector expected(output.size(), 0); - - // Calculate expected results on host - for(size_t i = 0; i < input.size() / logical_warp_size; i++) - { - for(size_t j = 0; j < logical_warp_size; j++) - { - auto idx = i * logical_warp_size + j; - expected[idx] = input[idx] + expected[j > 0 ? idx-1 : idx]; - } - } - - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_inclusive_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - if (std::is_integral::value) - { - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - } - else if (std::is_floating_point::value) - { - for(size_t i = 0; i < output.size(); i++) - { - auto tolerance = std::max(std::abs(0.1f * expected[i]), T(0.01f)); - ASSERT_NEAR(output[i], expected[i], tolerance); - } - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class T, - unsigned int BlockSize, - unsigned int LogicalWarpSize -> -__global__ -void warp_inclusive_scan_reduce_kernel( - T* device_input, - T* device_output, - T* device_output_reductions) -{ - constexpr unsigned int warps_no = BlockSize / LogicalWarpSize; - const unsigned int warp_id = test_utils::logical_warp_id(); - unsigned int index = hipThreadIdx_x + ( hipBlockIdx_x * BlockSize ); - - T value = device_input[index]; - T reduction = value; - - using wscan_t = hipcub::WarpScan; - __shared__ typename wscan_t::TempStorage storage[warps_no]; - if(warp_id%2 == 0) - { - auto scan_op = hipcub::Sum(); - wscan_t(storage[warp_id]).InclusiveScan(value, value, scan_op, reduction); - } - else - { - wscan_t(storage[warp_id]).InclusiveSum(value, value, reduction); - } - - device_output[index] = value; - if((hipThreadIdx_x % LogicalWarpSize) == 0) - { - device_output_reductions[index / LogicalWarpSize] = reduction; - } -} - -TYPED_TEST(HipcubWarpScanTests, InclusiveScanReduce) -{ - using T = typename TestFixture::type; - // logical warp side for warp primitive, execution warp size is always test_utils::warp_size() - constexpr size_t logical_warp_size = TestFixture::warp_size; - constexpr size_t block_size = - test_utils::is_power_of_two(logical_warp_size) - ? test_utils::max(test_utils::warp_size(), logical_warp_size * 4) - : (test_utils::warp_size()/logical_warp_size) * logical_warp_size; - unsigned int grid_size = 4; - const size_t size = block_size * grid_size; - - // Given warp size not supported - if(logical_warp_size > test_utils::warp_size()) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(size); - std::vector output_reductions(size / logical_warp_size); - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - - // Calculate expected results on host - for(size_t i = 0; i < output.size() / logical_warp_size; i++) - { - for(size_t j = 0; j < logical_warp_size; j++) - { - auto idx = i * logical_warp_size + j; - expected[idx] = input[idx] + expected[j > 0 ? idx-1 : idx]; - } - expected_reductions[i] = expected[(i+1) * logical_warp_size - 1]; - } - - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_inclusive_scan_reduce_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, device_output_reductions - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - if (std::is_integral::value) - { - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - for(size_t i = 0; i < output_reductions.size(); i++) - { - ASSERT_EQ(output_reductions[i], expected_reductions[i]); - } - } - else if (std::is_floating_point::value) - { - for(size_t i = 0; i < output.size(); i++) - { - auto tolerance = std::max(std::abs(0.1f * expected[i]), T(0.01f)); - ASSERT_NEAR(output[i], expected[i], tolerance); - } - - for(size_t i = 0; i < output_reductions.size(); i++) - { - auto tolerance = std::max(std::abs(0.1f * expected_reductions[i]), T(0.01f)); - ASSERT_NEAR(output_reductions[i], expected_reductions[i], tolerance); - } - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); -} - -template< - class T, - unsigned int BlockSize, - unsigned int LogicalWarpSize -> -__global__ -void warp_exclusive_scan_kernel(T* device_input, T* device_output, T init) -{ - constexpr unsigned int warps_no = BlockSize / LogicalWarpSize; - const unsigned int warp_id = test_utils::logical_warp_id(); - unsigned int index = hipThreadIdx_x + (hipBlockIdx_x * hipBlockDim_x); - - T value = device_input[index]; - - using wscan_t = hipcub::WarpScan; - __shared__ typename wscan_t::TempStorage storage[warps_no]; - auto scan_op = hipcub::Sum(); - wscan_t(storage[warp_id]).ExclusiveScan(value, value, init, scan_op); - - device_output[index] = value; -} - -TYPED_TEST(HipcubWarpScanTests, ExclusiveScan) -{ - using T = typename TestFixture::type; - // logical warp side for warp primitive, execution warp size is always test_utils::warp_size() - constexpr size_t logical_warp_size = TestFixture::warp_size; - constexpr size_t block_size = - test_utils::is_power_of_two(logical_warp_size) - ? test_utils::max(test_utils::warp_size(), logical_warp_size * 4) - : (test_utils::warp_size()/logical_warp_size) * logical_warp_size; - unsigned int grid_size = 4; - const size_t size = block_size * grid_size; - - // Given warp size not supported - if(logical_warp_size > test_utils::warp_size()) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(size); - std::vector expected(input.size(), 0); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - for(size_t i = 0; i < input.size() / logical_warp_size; i++) - { - expected[i * logical_warp_size] = init; - for(size_t j = 1; j < logical_warp_size; j++) - { - auto idx = i * logical_warp_size + j; - expected[idx] = input[idx-1] + expected[idx-1]; - } - } - - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_exclusive_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - if (std::is_integral::value) - { - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - } - else if (std::is_floating_point::value) - { - for(size_t i = 0; i < output.size(); i++) - { - auto tolerance = std::max(std::abs(0.1f * expected[i]), T(0.01f)); - ASSERT_NEAR(output[i], expected[i], tolerance); - } - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} - -template< - class T, - unsigned int BlockSize, - unsigned int LogicalWarpSize -> -__global__ -void warp_exclusive_scan_reduce_kernel( - T* device_input, - T* device_output, - T* device_output_reductions, - T init) -{ - constexpr unsigned int warps_no = BlockSize / LogicalWarpSize; - const unsigned int warp_id = test_utils::logical_warp_id(); - unsigned int index = hipThreadIdx_x + (hipBlockIdx_x * hipBlockDim_x); - - T value = device_input[index]; - T reduction = value; - - using wscan_t = hipcub::WarpScan; - __shared__ typename wscan_t::TempStorage storage[warps_no]; - auto scan_op = hipcub::Sum(); - wscan_t(storage[warp_id]).ExclusiveScan(value, value, init, scan_op, reduction); - - device_output[index] = value; - if((hipThreadIdx_x % LogicalWarpSize) == 0) - { - device_output_reductions[index / LogicalWarpSize] = reduction; - } -} - -TYPED_TEST(HipcubWarpScanTests, ExclusiveReduceScan) -{ - using T = typename TestFixture::type; - // logical warp side for warp primitive, execution warp size is always test_utils::warp_size() - constexpr size_t logical_warp_size = TestFixture::warp_size; - constexpr size_t block_size = - test_utils::is_power_of_two(logical_warp_size) - ? test_utils::max(test_utils::warp_size(), logical_warp_size * 4) - : (test_utils::warp_size()/logical_warp_size) * logical_warp_size; - unsigned int grid_size = 4; - const size_t size = block_size * grid_size; - - // Given warp size not supported - if(logical_warp_size > test_utils::warp_size()) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(size); - std::vector output_reductions(size / logical_warp_size); - std::vector expected(input.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - for(size_t i = 0; i < input.size() / logical_warp_size; i++) - { - expected[i * logical_warp_size] = init; - for(size_t j = 1; j < logical_warp_size; j++) - { - auto idx = i * logical_warp_size + j; - expected[idx] = input[idx-1] + expected[idx-1]; - } - - expected_reductions[i] = 0; - for(size_t j = 0; j < logical_warp_size; j++) - { - auto idx = i * logical_warp_size + j; - expected_reductions[i] += input[idx]; - } - } - - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_exclusive_scan_reduce_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, device_output_reductions, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - if (std::is_integral::value) - { - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - - for(size_t i = 0; i < output_reductions.size(); i++) - { - ASSERT_EQ(output_reductions[i], expected_reductions[i]); - } - } - else if (std::is_floating_point::value) - { - for(size_t i = 0; i < output.size(); i++) - { - auto tolerance = std::max(std::abs(0.1f * expected[i]), T(0.01f)); - ASSERT_NEAR(output[i], expected[i], tolerance); - } - - for(size_t i = 0; i < output_reductions.size(); i++) - { - auto tolerance = std::max(std::abs(0.1f * expected_reductions[i]), T(0.01f)); - ASSERT_NEAR(output_reductions[i], expected_reductions[i], tolerance); - } - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); -} - -template< - class T, - unsigned int BlockSize, - unsigned int LogicalWarpSize -> -__global__ -void warp_scan_kernel( - T* device_input, - T* device_inclusive_output, - T* device_exclusive_output, - T init) -{ - constexpr unsigned int warps_no = BlockSize / LogicalWarpSize; - const unsigned int warp_id = test_utils::logical_warp_id(); - unsigned int index = hipThreadIdx_x + (hipBlockIdx_x * hipBlockDim_x); - - T input = device_input[index]; - T inclusive_output, exclusive_output; - - using wscan_t = hipcub::WarpScan; - __shared__ typename wscan_t::TempStorage storage[warps_no]; - auto scan_op = hipcub::Sum(); - wscan_t(storage[warp_id]).Scan(input, inclusive_output, exclusive_output, init, scan_op); - - device_inclusive_output[index] = inclusive_output; - device_exclusive_output[index] = exclusive_output; -} - -TYPED_TEST(HipcubWarpScanTests, Scan) -{ - using T = typename TestFixture::type; - // logical warp side for warp primitive, execution warp size is always test_utils::warp_size() - constexpr size_t logical_warp_size = TestFixture::warp_size; - constexpr size_t block_size = - test_utils::is_power_of_two(logical_warp_size) - ? test_utils::max(test_utils::warp_size(), logical_warp_size * 4) - : (test_utils::warp_size()/logical_warp_size) * logical_warp_size; - unsigned int grid_size = 4; - const size_t size = block_size * grid_size; - - // Given warp size not supported - if(logical_warp_size > test_utils::warp_size()) - { - return; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output_inclusive(size); - std::vector output_exclusive(size); - std::vector expected_inclusive(output_inclusive.size(), 0); - std::vector expected_exclusive(output_exclusive.size(), 0); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - for(size_t i = 0; i < input.size() / logical_warp_size; i++) - { - expected_exclusive[i * logical_warp_size] = init; - expected_inclusive[i * logical_warp_size] = init; - for(size_t j = 0; j < logical_warp_size; j++) - { - auto idx = i * logical_warp_size + j; - expected_inclusive[idx] = input[idx] + expected_inclusive[j > 0 ? idx-1 : idx]; - if(j > 0) - { - expected_exclusive[idx] = input[idx-1] + expected_exclusive[idx-1]; - } - } - } - - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_inclusive_output; - HIP_CHECK( - hipMalloc( - &device_inclusive_output, - output_inclusive.size() * sizeof(typename decltype(output_inclusive)::value_type) - ) - ); - T* device_exclusive_output; - HIP_CHECK( - hipMalloc( - &device_exclusive_output, - output_exclusive.size() * sizeof(typename decltype(output_exclusive)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_inclusive_output, device_exclusive_output, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output_inclusive.data(), device_inclusive_output, - output_inclusive.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_exclusive.data(), device_exclusive_output, - output_exclusive.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - if (std::is_integral::value) - { - for(size_t i = 0; i < output_inclusive.size(); i++) - { - ASSERT_EQ(output_inclusive[i], expected_inclusive[i]); - ASSERT_EQ(output_exclusive[i], expected_exclusive[i]); - } - } - else if (std::is_floating_point::value) - { - for(size_t i = 0; i < output_inclusive.size(); i++) - { - auto tolerance = std::max(std::abs(0.1f * expected_inclusive[i]), T(0.01f)); - ASSERT_NEAR(output_inclusive[i], expected_inclusive[i], tolerance); - - tolerance = std::max(std::abs(0.1f * expected_exclusive[i]), T(0.01f)); - ASSERT_NEAR(output_exclusive[i], expected_exclusive[i], tolerance); - } - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_inclusive_output)); - HIP_CHECK(hipFree(device_exclusive_output)); -} - -TYPED_TEST(HipcubWarpScanTests, InclusiveScanCustomType) -{ - using base_type = typename TestFixture::type; - using T = test_utils::custom_test_type; - // logical warp side for warp primitive, execution warp size is always test_utils::warp_size() - constexpr size_t logical_warp_size = TestFixture::warp_size; - constexpr size_t block_size = - test_utils::is_power_of_two(logical_warp_size) - ? test_utils::max(test_utils::warp_size(), logical_warp_size * 4) - : (test_utils::warp_size()/logical_warp_size) * logical_warp_size; - unsigned int grid_size = 4; - const size_t size = block_size * grid_size; - - // Given warp size not supported - if(logical_warp_size > test_utils::warp_size()) - { - return; - } - - // Generate data - std::vector input(size); - std::vector output(size); - std::vector expected(output.size(), 0); - - // Initializing input data - { - auto random_values = - test_utils::get_random_data(2 * input.size(), 0, 100); - for(size_t i = 0; i < input.size(); i++) - { - input[i].x = random_values[i]; - input[i].y = random_values[i + input.size()]; - } - } - - // Calculate expected results on host - for(size_t i = 0; i < input.size() / logical_warp_size; i++) - { - for(size_t j = 0; j < logical_warp_size; j++) - { - auto idx = i * logical_warp_size + j; - expected[idx] = input[idx] + expected[j > 0 ? idx-1 : idx]; - } - } - - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_inclusive_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - if (std::is_integral::value) - { - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]); - } - } - else if (std::is_floating_point::value) - { - for(size_t i = 0; i < output.size(); i++) - { - auto tolerance_x = std::max(std::abs(0.1f * expected[i].x), base_type(0.01f)); - auto tolerance_y = std::max(std::abs(0.1f * expected[i].y), base_type(0.01f)); - ASSERT_NEAR(output[i].x, expected[i].x, tolerance_x); - ASSERT_NEAR(output[i].y, expected[i].y, tolerance_y); - } - } - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); -} diff --git a/test/hipcub/test_utils.hpp b/test/hipcub/test_utils.hpp deleted file mode 100644 index b93a2161e..000000000 --- a/test/hipcub/test_utils.hpp +++ /dev/null @@ -1,500 +0,0 @@ -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef ROCPRIM_TEST_TEST_UTILS_HPP_ -#define ROCPRIM_TEST_TEST_UTILS_HPP_ - -#include -#include -#include -#include -#include - -// hipCUB -#include - -namespace test_utils -{ - -template -inline auto get_random_data(size_t size, T min, T max) - -> typename std::enable_if::value, std::vector>::type -{ - std::random_device rd; - std::default_random_engine gen(rd()); - std::uniform_int_distribution distribution(min, max); - std::vector data(size); - std::generate(data.begin(), data.end(), [&]() { return distribution(gen); }); - return data; -} - -template -inline auto get_random_data(size_t size, T min, T max) - -> typename std::enable_if::value, std::vector>::type -{ - std::random_device rd; - std::default_random_engine gen(rd()); - std::uniform_real_distribution distribution(min, max); - std::vector data(size); - std::generate(data.begin(), data.end(), [&]() { return distribution(gen); }); - return data; -} - -template -inline std::vector get_random_data01(size_t size, float p) -{ - const size_t max_random_size = 1024 * 1024; - std::random_device rd; - std::default_random_engine gen(rd()); - std::bernoulli_distribution distribution(p); - std::vector data(size); - std::generate( - data.begin(), data.begin() + std::min(size, max_random_size), - [&]() { return distribution(gen); } - ); - for(size_t i = max_random_size; i < size; i += max_random_size) - { - std::copy_n(data.begin(), std::min(size - i, max_random_size), data.begin() + i); - } - return data; -} - -template -inline auto get_random_value(T min, T max) - -> typename std::enable_if::value, T>::type -{ - return get_random_data(1, min, max)[0]; -} - -// Can't use std::prefix_sum for inclusive/exclusive scan, because -// it does not handle short[] -> int(int a, int b) { a + b; } -> int[] -// they way we expect. That's because sum in std::prefix_sum's implementation -// is of type typename std::iterator_traits::value_type (short) -template -OutputIt host_inclusive_scan(InputIt first, InputIt last, - OutputIt d_first, BinaryOperation op) -{ - using input_type = typename std::iterator_traits::value_type; - using output_type = typename std::iterator_traits::value_type; - using result_type = - typename std::conditional< - std::is_void::value, input_type, output_type - >::type; - - if (first == last) return d_first; - - result_type sum = *first; - *d_first = sum; - - while (++first != last) { - sum = op(sum, static_cast(*first)); - *++d_first = sum; - } - return ++d_first; -} - -template -OutputIt host_exclusive_scan(InputIt first, InputIt last, - T initial_value, OutputIt d_first, - BinaryOperation op) -{ - using input_type = typename std::iterator_traits::value_type; - using output_type = typename std::iterator_traits::value_type; - using result_type = - typename std::conditional< - std::is_void::value, input_type, output_type - >::type; - - if (first == last) return d_first; - - result_type sum = initial_value; - *d_first = initial_value; - - while ((first+1) != last) - { - sum = op(sum, static_cast(*first)); - *++d_first = sum; - first++; - } - return ++d_first; -} - -template -OutputIt host_exclusive_scan_by_key(InputIt first, InputIt last, KeyIt k_first, - T initial_value, OutputIt d_first, - BinaryOperation op, KeyCompare key_compare_op) -{ - using input_type = typename std::iterator_traits::value_type; - using output_type = typename std::iterator_traits::value_type; - using result_type = - typename std::conditional< - std::is_void::value, input_type, output_type - >::type; - - if (first == last) return d_first; - - result_type sum = initial_value; - *d_first = initial_value; - - while ((first+1) != last) - { - if(key_compare_op(*k_first, *++k_first)) - { - sum = op(sum, static_cast(*first)); - } - else - { - sum = initial_value; - } - *++d_first = sum; - first++; - } - return ++d_first; -} - -HIPCUB_HOST_DEVICE inline -constexpr unsigned int warp_size() -{ - return HIPCUB_WARP_THREADS; -} - -template -HIPCUB_HOST_DEVICE inline -constexpr T max(const T& a, const T& b) -{ - return a < b ? b : a; -} - -template -HIPCUB_HOST_DEVICE inline -constexpr T min(const T& a, const T& b) -{ - return a < b ? a : b; -} - -template -HIPCUB_HOST_DEVICE inline -constexpr bool is_power_of_two(const T x) -{ - static_assert(std::is_integral::value, "T must be integer type"); - return (x > 0) && ((x & (x - 1)) == 0); -} - -template -HIPCUB_HOST_DEVICE inline -constexpr T next_power_of_two(const T x, const T acc = 1) -{ - static_assert(std::is_unsigned::value, "T must be unsigned type"); - return acc >= x ? acc : next_power_of_two(x, 2 * acc); -} - -// Return thread id in a "logical warp", which can be smaller than a hardware warp size. -template -HIPCUB_DEVICE inline -auto logical_lane_id() - -> typename std::enable_if::type -{ - return hipcub::LaneId() & (LogicalWarpSize-1); // same as land_id()%WarpSize -} - -template -HIPCUB_DEVICE inline -auto logical_lane_id() - -> typename std::enable_if::type -{ - return hipcub::LaneId()%LogicalWarpSize; -} - -template<> -HIPCUB_DEVICE inline -unsigned int logical_lane_id() -{ - return hipcub::LaneId(); -} - -// Return id of "logical warp" in a block -template -HIPCUB_DEVICE inline -unsigned int logical_warp_id() -{ - return hipcub::RowMajorTid(1, 1, 1)/LogicalWarpSize; -} - -template<> -HIPCUB_DEVICE inline -unsigned int logical_warp_id() -{ - return hipcub::WarpId(); -} - -inline -size_t get_max_block_size() -{ - hipDeviceProp_t device_properties; - hipError_t error = hipGetDeviceProperties(&device_properties, 0); - if(error != hipSuccess) - { - std::cout << "HIP error: " << error - << " file: " << __FILE__ - << " line: " << __LINE__ - << std::endl; - std::exit(error); - } - return device_properties.maxThreadsPerBlock; -} - -// Select the minimal warp size for block of size block_size, it's -// useful for blocks smaller than maximal warp size. -template -HIPCUB_HOST_DEVICE inline -constexpr T get_min_warp_size(const T block_size, const T max_warp_size) -{ - static_assert(std::is_unsigned::value, "T must be unsigned type"); - return block_size >= max_warp_size ? max_warp_size : next_power_of_two(block_size); -} - -template -struct custom_test_type -{ - using value_type = T; - - T x; - T y; - - HIPCUB_HOST_DEVICE inline - constexpr custom_test_type() {} - - HIPCUB_HOST_DEVICE inline - constexpr custom_test_type(T x, T y) : x(x), y(y) {} - - HIPCUB_HOST_DEVICE inline - constexpr custom_test_type(T xy) : x(xy), y(xy) {} - - template - HIPCUB_HOST_DEVICE inline - custom_test_type(const custom_test_type& other) - { - x = other.x; - y = other.y; - } - - #ifndef HIPCUB_CUB_API - HIPCUB_HOST_DEVICE inline - ~custom_test_type() = default; - #endif - - HIPCUB_HOST_DEVICE inline - custom_test_type& operator=(const custom_test_type& other) - { - x = other.x; - y = other.y; - return *this; - } - - HIPCUB_HOST_DEVICE inline - custom_test_type operator+(const custom_test_type& other) const - { - return custom_test_type(x + other.x, y + other.y); - } - - HIPCUB_HOST_DEVICE inline - custom_test_type operator-(const custom_test_type& other) const - { - return custom_test_type(x - other.x, y - other.y); - } - - HIPCUB_HOST_DEVICE inline - bool operator<(const custom_test_type& other) const - { - return (x < other.x || (x == other.x && y < other.y)); - } - - HIPCUB_HOST_DEVICE inline - bool operator>(const custom_test_type& other) const - { - return (x > other.x || (x == other.x && y > other.y)); - } - - HIPCUB_HOST_DEVICE inline - bool operator==(const custom_test_type& other) const - { - return (x == other.x && y == other.y); - } - - HIPCUB_HOST_DEVICE inline - bool operator!=(const custom_test_type& other) const - { - return !(*this == other); - } -}; - -template -struct is_custom_test_type : std::false_type -{ -}; - -template -struct is_custom_test_type> : std::true_type -{ -}; - -template -inline auto get_random_data(size_t size, typename T::value_type min, typename T::value_type max) - -> typename std::enable_if< - is_custom_test_type::value && std::is_integral::value, - std::vector - >::type -{ - std::random_device rd; - std::default_random_engine gen(rd()); - std::uniform_int_distribution distribution(min, max); - std::vector data(size); - std::generate(data.begin(), data.end(), [&]() { return T(distribution(gen), distribution(gen)); }); - return data; -} - -template -inline auto get_random_data(size_t size, typename T::value_type min, typename T::value_type max) - -> typename std::enable_if< - is_custom_test_type::value && std::is_floating_point::value, - std::vector - >::type -{ - std::random_device rd; - std::default_random_engine gen(rd()); - std::uniform_real_distribution distribution(min, max); - std::vector data(size); - std::generate(data.begin(), data.end(), [&]() { return T(distribution(gen), distribution(gen)); }); - return data; -} - -template -auto assert_near(const std::vector& result, const std::vector& expected, const float percent) - -> typename std::enable_if::value && std::is_arithmetic::value>::type -{ - ASSERT_EQ(result.size(), expected.size()); - for(size_t i = 0; i < result.size(); i++) - { - auto diff = std::max(std::abs(percent * expected[i]), T(percent)); - if(std::is_integral::value) diff = 0; - ASSERT_NEAR(result[i], expected[i], diff) << "where index = " << i; - } -} - -template -auto assert_near(const T& result, const T& expected, const float percent) - -> typename std::enable_if::value && std::is_arithmetic::value>::type -{ - auto diff = std::max(std::abs(percent * expected), T(percent)); - if(std::is_integral::value) diff = 0; - ASSERT_NEAR(result, expected, diff); -} - - -template -auto assert_near(const T& result, const T& expected, const float percent) - -> typename std::enable_if::value>::type -{ - using value_type = typename T::value_type; - auto diff1 = std::max(std::abs(percent * expected.x), value_type(percent)); - auto diff2 = std::max(std::abs(percent * expected.y), value_type(percent)); - if(std::is_integral::value) - { - diff1 = 0; - diff2 = 0; - } - ASSERT_NEAR(result.x, expected.x, diff1); - ASSERT_NEAR(result.y, expected.y, diff2); -} - -template -auto assert_near(const std::vector& result, const std::vector& expected, const float percent) - -> typename std::enable_if::value>::type -{ - using value_type = typename T::value_type; - ASSERT_EQ(result.size(), expected.size()); - for(size_t i = 0; i < result.size(); i++) - { - auto diff1 = std::max(std::abs(percent * expected[i].x), value_type(percent)); - auto diff2 = std::max(std::abs(percent * expected[i].y), value_type(percent)); - if(std::is_integral::value) - { - diff1 = 0; - diff2 = 0; - } - ASSERT_NEAR(result[i].x, expected[i].x, diff1) << "where index = " << i; - ASSERT_NEAR(result[i].y, expected[i].y, diff2) << "where index = " << i; - } -} - -template -auto assert_near(const std::vector& result, const std::vector& expected, const float) - -> typename std::enable_if::value && !std::is_arithmetic::value>::type -{ - ASSERT_EQ(result.size(), expected.size()); - for(size_t i = 0; i < result.size(); i++) - { - ASSERT_EQ(result[i], expected[i]) << "where index = " << i; - } -} - -} // end test_util namespace - -// Need for hipcub::DeviceReduce::Min/Max etc. -namespace std -{ - template<> - class numeric_limits> - { - using T = typename test_utils::custom_test_type; - - public: - - static constexpr inline T max() - { - return std::numeric_limits::max(); - } - - static constexpr inline T lowest() - { - return std::numeric_limits::lowest(); - } - }; - - template<> - class numeric_limits> - { - using T = typename test_utils::custom_test_type; - - public: - - static constexpr inline T max() - { - return std::numeric_limits::max(); - } - - static constexpr inline T lowest() - { - return std::numeric_limits::lowest(); - } - }; -} - -#endif // ROCPRIM_TEST_HIPCUB_TEST_UTILS_HPP_