From 3c432c78e9859ef30187b8f2ad7fc052ffaf7727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1ty=C3=A1s=20Aradi?= Date: Wed, 21 May 2025 20:41:18 +0000 Subject: [PATCH 1/2] Fix "Add thrust::inclusive_scan with init_value support" --- thrust/system/cuda/detail/scan.h | 3 ++- thrust/system/tbb/detail/scan.inl | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/thrust/system/cuda/detail/scan.h b/thrust/system/cuda/detail/scan.h index 5786a3a69..d20cb1294 100644 --- a/thrust/system/cuda/detail/scan.h +++ b/thrust/system/cuda/detail/scan.h @@ -235,7 +235,8 @@ _CCCL_HOST_DEVICE OutputIt inclusive_scan_n( { THRUST_CDP_DISPATCH( (result = thrust::cuda_cub::detail::inclusive_scan_n_impl(policy, first, num_items, result, init, scan_op);), - (result = thrust::inclusive_scan(cvt_to_seq(derived_cast(policy)), first, first + num_items, result, scan_op);)); + (result = + thrust::inclusive_scan(cvt_to_seq(derived_cast(policy)), first, first + num_items, result, init, scan_op);)); return result; } diff --git a/thrust/system/tbb/detail/scan.inl b/thrust/system/tbb/detail/scan.inl index 924c52b28..b5d713112 100644 --- a/thrust/system/tbb/detail/scan.inl +++ b/thrust/system/tbb/detail/scan.inl @@ -26,7 +26,13 @@ #include #include -#include +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +# include +#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_HIP +# include +#endif + +#include #include #include @@ -235,8 +241,15 @@ OutputIterator inclusive_scan( using namespace thrust::detail; // Use the input iterator's value type and the initial value type per wg21.link/p2322 - using ValueType = typename ::cuda::std:: - __accumulator_t::value_type, InitialValueType>; + #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA + using ValueType = typename ::cuda::std:: + __accumulator_t::value_type, InitialValueType>; + #elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_HIP + using ValueType = ::rocprim:: + accumulator_t::value_type, InitialValueType>; + #else + using ValueType = typename std::iterator_traits::value_type; + #endif using Size = typename thrust::iterator_difference::type; Size n = thrust::distance(first, last); From 47eac4f303e1c5cb96fd6a312f542de9fb4a237c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1ty=C3=A1s=20Aradi?= Date: Wed, 28 May 2025 07:19:29 +0000 Subject: [PATCH 2/2] Resolve "General changes CCCL 2.7.0" --- .gitlab-ci.yml | 3 +- CHANGELOG.md | 2 + CMakeLists.txt | 4 + examples/include/host_device.h | 4 +- test/test_constant_iterator.cpp | 4 +- testing/CMakeLists.txt | 1 + testing/allocator.cu | 6 +- testing/async_sort.cu | 4 +- testing/cuda/scan.cu | 31 + testing/cuda/sort.cu | 173 ++++ .../functional_placeholders_miscellaneous.cu | 41 + testing/inner_product.cu | 2 +- testing/merge_by_key.cu | 66 ++ testing/omp/reduce_intervals.cu | 2 +- testing/scan.cu | 241 ++--- testing/sort.cu | 19 +- testing/tabulate_output_iterator.cu | 167 +++ testing/transform_iterator.cu | 102 ++ testing/type_traits.cu | 78 ++ testing/unique.cu | 42 + testing/unittest/assertions.h | 24 +- testing/unittest/meta.h | 15 + testing/vector_allocators.cu | 4 +- thrust/cmake/thrust-config-version.cmake | 2 +- thrust/cmake/thrust-config.cmake | 10 + thrust/complex.h | 115 +-- thrust/detail/allocator/allocator_traits.h | 4 +- thrust/detail/allocator/allocator_traits.inl | 10 +- thrust/detail/allocator/destroy_range.h | 2 +- thrust/detail/allocator/destroy_range.inl | 12 +- thrust/detail/allocator/malloc_allocator.h | 2 +- thrust/detail/allocator/malloc_allocator.inl | 2 +- thrust/detail/allocator/no_throw_allocator.h | 2 +- thrust/detail/allocator/temporary_allocator.h | 2 +- .../detail/allocator/temporary_allocator.inl | 4 +- ...truct_range.h => value_initialize_range.h} | 4 +- ...t_range.inl => value_initialize_range.inl} | 8 +- thrust/detail/complex/catrig.h | 7 - thrust/detail/complex/catrigf.h | 6 - thrust/detail/config/compiler.h | 42 + thrust/detail/config/compiler_fence.h | 2 +- thrust/detail/config/cpp_dialect.h | 20 +- thrust/detail/contiguous_storage.h | 24 +- thrust/detail/contiguous_storage.inl | 26 +- thrust/detail/function.h | 111 +- thrust/detail/functional/actor.h | 223 ++-- thrust/detail/functional/actor.inl | 102 -- thrust/detail/functional/argument.h | 69 -- thrust/detail/functional/composite.h | 115 --- thrust/detail/functional/operators.h | 365 ++++++- .../operators/arithmetic_operators.h | 256 ----- .../operators/assignment_operator.h | 69 -- .../functional/operators/bitwise_operators.h | 188 ---- .../operators/compound_assignment_operators.h | 315 ------ .../functional/operators/logical_operators.h | 87 -- .../functional/operators/operator_adaptors.h | 120 --- .../operators/relational_operators.h | 170 ---- thrust/detail/functional/placeholder.h | 38 - thrust/detail/functional/value.h | 73 -- thrust/detail/integer_math.h | 13 + thrust/detail/internal_functional.h | 19 - thrust/detail/malloc_and_free.h | 4 +- thrust/detail/memory_algorithms.h | 12 +- thrust/detail/range/head_flags.h | 147 +-- thrust/detail/reference.h | 2 +- thrust/detail/temporary_array.inl | 2 +- .../{function_traits.h => is_commutative.h} | 51 +- thrust/detail/type_traits/pointer_traits.h | 43 +- .../result_of_adaptable_function.h | 11 +- thrust/detail/vector_base.h | 23 +- thrust/detail/vector_base.inl | 51 +- thrust/device_malloc_allocator.h | 15 +- thrust/device_new_allocator.h | 2 +- thrust/device_reference.h | 4 +- thrust/device_vector.h | 2 +- thrust/functional.h | 422 +++----- thrust/host_vector.h | 2 +- thrust/iterator/detail/iterator_traits.inl | 18 +- .../detail/tabulate_output_iterator.inl | 61 ++ thrust/iterator/detail/transform_iterator.inl | 31 +- thrust/iterator/iterator_traits.h | 2 - thrust/iterator/tabulate_output_iterator.h | 109 ++ thrust/iterator/transform_iterator.h | 5 +- thrust/mr/allocator.h | 2 +- thrust/mr/memory_resource.h | 4 +- thrust/optional.h | 13 +- thrust/pair.h | 7 + thrust/partition.h | 20 +- thrust/random/detail/normal_distribution.inl | 32 +- .../random/detail/normal_distribution_base.h | 3 +- thrust/remove.h | 8 +- thrust/scan.h | 106 +- thrust/scatter.h | 23 +- thrust/system/cpp/pointer.h | 5 +- .../system/cuda/detail/adjacent_difference.h | 2 +- thrust/system/cuda/detail/assign_value.h | 2 +- thrust/system/cuda/detail/async/copy.h | 13 +- .../system/cuda/detail/async/customization.h | 4 +- .../system/cuda/detail/async/exclusive_scan.h | 6 +- thrust/system/cuda/detail/async/for_each.h | 7 +- .../system/cuda/detail/async/inclusive_scan.h | 6 +- thrust/system/cuda/detail/async/reduce.h | 4 +- thrust/system/cuda/detail/async/sort.h | 7 +- thrust/system/cuda/detail/async/transform.h | 8 +- thrust/system/cuda/detail/copy.h | 2 +- thrust/system/cuda/detail/copy_if.h | 2 +- .../system/cuda/detail/core/agent_launcher.h | 2 +- thrust/system/cuda/detail/core/util.h | 4 +- thrust/system/cuda/detail/count.h | 2 +- thrust/system/cuda/detail/dispatch.h | 246 +++-- thrust/system/cuda/detail/equal.h | 2 +- thrust/system/cuda/detail/extrema.h | 2 +- thrust/system/cuda/detail/fill.h | 2 +- thrust/system/cuda/detail/find.h | 2 +- thrust/system/cuda/detail/for_each.h | 4 +- thrust/system/cuda/detail/gather.h | 2 +- thrust/system/cuda/detail/generate.h | 2 +- thrust/system/cuda/detail/get_value.h | 2 +- thrust/system/cuda/detail/inner_product.h | 2 +- .../cuda/detail/internal/copy_cross_system.h | 2 +- .../detail/internal/copy_device_to_device.h | 2 +- thrust/system/cuda/detail/iter_swap.h | 2 +- thrust/system/cuda/detail/merge.h | 957 +++--------------- thrust/system/cuda/detail/mismatch.h | 2 +- thrust/system/cuda/detail/parallel_for.h | 2 +- thrust/system/cuda/detail/partition.h | 2 +- .../system/cuda/detail/per_device_resource.h | 2 +- thrust/system/cuda/detail/reduce.h | 2 +- thrust/system/cuda/detail/reduce_by_key.h | 2 +- thrust/system/cuda/detail/remove.h | 2 +- thrust/system/cuda/detail/replace.h | 2 +- thrust/system/cuda/detail/reverse.h | 2 +- thrust/system/cuda/detail/scan.h | 33 +- thrust/system/cuda/detail/scan_by_key.h | 2 +- thrust/system/cuda/detail/scatter.h | 2 +- thrust/system/cuda/detail/set_operations.h | 2 +- thrust/system/cuda/detail/sort.h | 47 +- thrust/system/cuda/detail/swap_ranges.h | 2 +- thrust/system/cuda/detail/tabulate.h | 2 +- thrust/system/cuda/detail/transform.h | 2 +- thrust/system/cuda/detail/transform_reduce.h | 2 +- thrust/system/cuda/detail/transform_scan.h | 2 +- .../system/cuda/detail/uninitialized_copy.h | 2 +- .../system/cuda/detail/uninitialized_fill.h | 2 +- thrust/system/cuda/detail/unique.h | 2 +- thrust/system/cuda/detail/unique_by_key.h | 2 +- .../system/detail/generic/binary_search.inl | 2 +- .../system/detail/generic/inner_product.inl | 19 +- .../system/detail/generic/reduce_by_key.inl | 1 - .../detail/generic/scalar/binary_search.inl | 6 +- .../system/detail/generic/transform_scan.inl | 1 - .../system/detail/sequential/binary_search.h | 6 +- thrust/system/detail/sequential/copy_if.h | 2 +- thrust/system/detail/sequential/extrema.h | 6 +- thrust/system/detail/sequential/find.h | 2 +- thrust/system/detail/sequential/for_each.h | 4 +- .../system/detail/sequential/general_copy.h | 4 +- .../system/detail/sequential/insertion_sort.h | 4 +- thrust/system/detail/sequential/merge.inl | 4 +- thrust/system/detail/sequential/partition.h | 18 +- thrust/system/detail/sequential/reduce.h | 2 +- .../system/detail/sequential/reduce_by_key.h | 2 +- thrust/system/detail/sequential/remove.h | 8 +- thrust/system/detail/sequential/scan.h | 9 +- thrust/system/detail/sequential/scan_by_key.h | 6 +- .../system/detail/sequential/set_operations.h | 8 +- .../system/detail/sequential/unique_by_key.h | 2 +- thrust/system/hip/detail/binary_search.h | 6 +- thrust/system/hip/detail/for_each.h | 2 +- thrust/system/hip/detail/general/various.h | 4 +- thrust/system/hip/detail/util.h | 3 +- thrust/system/omp/detail/for_each.inl | 2 +- thrust/system/omp/detail/reduce_intervals.inl | 2 +- thrust/system/tbb/detail/copy_if.inl | 4 +- thrust/system/tbb/detail/reduce.inl | 4 +- thrust/system/tbb/detail/reduce_by_key.inl | 45 +- thrust/system/tbb/detail/reduce_intervals.h | 3 +- thrust/system/tbb/detail/scan.inl | 13 +- thrust/tuple.h | 182 ++-- thrust/type_traits/is_trivially_relocatable.h | 42 + thrust/type_traits/logical_metafunctions.h | 289 +----- thrust/type_traits/void_t.h | 14 +- thrust/unique.h | 16 +- thrust/version.h | 2 +- 184 files changed, 2871 insertions(+), 4216 deletions(-) create mode 100644 testing/tabulate_output_iterator.cu rename thrust/detail/allocator/{default_construct_range.h => value_initialize_range.h} (84%) rename thrust/detail/allocator/{default_construct_range.inl => value_initialize_range.inl} (91%) delete mode 100644 thrust/detail/functional/actor.inl delete mode 100644 thrust/detail/functional/argument.h delete mode 100644 thrust/detail/functional/composite.h delete mode 100644 thrust/detail/functional/operators/arithmetic_operators.h delete mode 100644 thrust/detail/functional/operators/assignment_operator.h delete mode 100644 thrust/detail/functional/operators/bitwise_operators.h delete mode 100644 thrust/detail/functional/operators/compound_assignment_operators.h delete mode 100644 thrust/detail/functional/operators/logical_operators.h delete mode 100644 thrust/detail/functional/operators/operator_adaptors.h delete mode 100644 thrust/detail/functional/operators/relational_operators.h delete mode 100644 thrust/detail/functional/placeholder.h delete mode 100644 thrust/detail/functional/value.h rename thrust/detail/type_traits/{function_traits.h => is_commutative.h} (59%) create mode 100644 thrust/iterator/detail/tabulate_output_iterator.inl create mode 100644 thrust/iterator/tabulate_output_iterator.h diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8f42784e6..dcb62eb28 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -436,7 +436,7 @@ build:cuda-and-omp: tags: - build variables: - CCCL_GIT_BRANCH: v2.6.0 + CCCL_GIT_BRANCH: v2.7.0 CCCL_DIR: ${CI_PROJECT_DIR}/cccl needs: [] script: @@ -444,6 +444,7 @@ build:cuda-and-omp: # Replace CCCL Thrust headers with rocThrust headers - rm -R $CCCL_DIR/thrust/thrust - cp -r $CI_PROJECT_DIR/thrust $CCCL_DIR/thrust + - cp $CI_PROJECT_DIR/testing/type_traits.cu $CCCL_DIR/thrust/testing # Build tests and examples from CCCL Thrust - cmake -G Ninja diff --git a/CHANGELOG.md b/CHANGELOG.md index 9891c75c0..f339eb9d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ Documentation for rocThrust available at ### Changed * Updated the required version of Google Benchmark from 1.8.0 to 1.9.0. +* Drop `c++14` support for rocthrust. +* Renamed `cpp14_required.h` to `cpp_version_check.h` ### Removed diff --git a/CMakeLists.txt b/CMakeLists.txt index 79b8dafa8..9399b7307 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,10 @@ option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg wit check_language(HIP) cmake_dependent_option(USE_HIPCXX "Use CMake HIP language support" OFF CMAKE_HIP_COMPILER OFF) +# Allow the user to optionally select offset type dispatch to fixed 32 or 64 bit types +set(THRUST_DISPATCH_TYPE "Dynamic" CACHE STRING "Select Thrust offset type dispatch." FORCE) +set_property(CACHE THRUST_DISPATCH_TYPE PROPERTY STRINGS "Dynamic" "Force32bit" "Force64bit") + #Adding CMAKE_PREFIX_PATH list( APPEND CMAKE_PREFIX_PATH /opt/rocm/llvm /opt/rocm ${ROCM_PATH} ) diff --git a/examples/include/host_device.h b/examples/include/host_device.h index f7a4a9547..6b6e22147 100644 --- a/examples/include/host_device.h +++ b/examples/include/host_device.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2009 NVIDIA Corporation - * Modifications Copyright© 2024 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2024-2025 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ #pragma once -#if THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_NVCC +#ifndef _CCCL_CUDA_COMPILER # ifndef __host__ # define __host__ diff --git a/test/test_constant_iterator.cpp b/test/test_constant_iterator.cpp index 472e31d62..09186ccfd 100644 --- a/test/test_constant_iterator.cpp +++ b/test/test_constant_iterator.cpp @@ -147,14 +147,14 @@ TYPED_TEST(ConstantIteratorTests, ConstantIteratorTransform) ConstIter last1 = first1 + result.size(); ConstIter first2 = make_constant_iterator(3); - transform(first1, last1, result.begin(), negate()); + thrust::transform(first1, last1, result.begin(), thrust::negate()); ASSERT_EQ(-7, result[0]); ASSERT_EQ(-7, result[1]); ASSERT_EQ(-7, result[2]); ASSERT_EQ(-7, result[3]); - transform(first1, last1, first2, result.begin(), plus()); + thrust::transform(first1, last1, first2, result.begin(), thrust::plus()); ASSERT_EQ(10, result[0]); ASSERT_EQ(10, result[1]); diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index d3336fb2b..968761962 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -270,6 +270,7 @@ add_thrust_test("stable_sort_by_key_large_values") add_thrust_test("stable_sort_large") add_thrust_test("swap_ranges") add_thrust_test("tabulate") +add_thrust_test("tabulate_output_iterator") add_thrust_test("transform") add_thrust_test("transform_iterator") add_thrust_test("transform_input_output_iterator") diff --git a/testing/allocator.cu b/testing/allocator.cu index 6a18415e6..ba489d747 100644 --- a/testing/allocator.cu +++ b/testing/allocator.cu @@ -103,7 +103,7 @@ struct my_allocator_with_custom_destroy THRUST_HOST ~my_allocator_with_custom_destroy() {} - THRUST_HOST_DEVICE void destroy(T*) + THRUST_HOST_DEVICE void destroy(T*) noexcept { NV_IF_TARGET(NV_IS_HOST, (g_state = true;)); } @@ -113,7 +113,7 @@ struct my_allocator_with_custom_destroy return use_me_to_alloc.allocate(n); } - void deallocate(value_type* ptr, std::ptrdiff_t n) + void deallocate(value_type* ptr, std::ptrdiff_t n) noexcept { use_me_to_alloc.deallocate(ptr, n); } @@ -175,7 +175,7 @@ struct my_minimal_allocator return use_me_to_alloc.allocate(n); } - void deallocate(value_type* ptr, std::ptrdiff_t n) + void deallocate(value_type* ptr, std::ptrdiff_t n) noexcept { use_me_to_alloc.deallocate(ptr, n); } diff --git a/testing/async_sort.cu b/testing/async_sort.cu index d796be5cc..8e5399ad2 100644 --- a/testing/async_sort.cu +++ b/testing/async_sort.cu @@ -18,7 +18,7 @@ #include // Disabled on MSVC && NVCC < 11.1 for GH issue #1098. -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && defined(__CUDACC__) +#if defined(_CCCL_COMPILER_MSVC) && defined(__CUDACC__) # if (__CUDACC_VER_MAJOR__ < 11) || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ < 1) # define THRUST_BUG_1098_ACTIVE # endif // NVCC version check @@ -102,7 +102,7 @@ struct test_async_sort template struct tester { - THRUST_HOST void operator()(std::size_t n) + THRUST_HOST_DEVICE void operator()(std::size_t n) { thrust::host_vector h0_data(unittest::random_integers(n)); thrust::device_vector d0_data(h0_data); diff --git a/testing/cuda/scan.cu b/testing/cuda/scan.cu index 4de52d45d..b25a42e9f 100644 --- a/testing/cuda/scan.cu +++ b/testing/cuda/scan.cu @@ -29,6 +29,13 @@ __global__ void inclusive_scan_kernel(ExecutionPolicy exec, Iterator1 first, Ite thrust::inclusive_scan(exec, first, last, result); } +template +__global__ void +inclusive_scan_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Iterator2 result, T init, Pred pred) +{ + thrust::inclusive_scan(exec, first, last, result, init, pred); +} + template __global__ void exclusive_scan_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Iterator2 result) { @@ -60,6 +67,16 @@ void TestScanDevice(ExecutionPolicy exec, const size_t n) ASSERT_EQUAL(d_output, h_output); + thrust::inclusive_scan(h_input.begin(), h_input.end(), h_output.begin(), (T) 11, thrust::plus{}); + + inclusive_scan_kernel<<<1, 1>>>(exec, d_input.begin(), d_input.end(), d_output.begin(), (T) 11, thrust::plus{}); + { + cudaError_t const err = cudaDeviceSynchronize(); + ASSERT_EQUAL(cudaSuccess, err); + } + + ASSERT_EQUAL(d_output, h_output); + thrust::exclusive_scan(h_input.begin(), h_input.end(), h_output.begin()); exclusive_scan_kernel<<<1, 1>>>(exec, d_input.begin(), d_input.end(), d_output.begin()); @@ -203,6 +220,20 @@ void TestScanCudaStreams() ASSERT_EQUAL(input, input_copy); ASSERT_EQUAL(output, result); + // inclusive scan with init and op + iter = + thrust::inclusive_scan(thrust::cuda::par.on(s), input.begin(), input.end(), output.begin(), 3, thrust::plus()); + cudaStreamSynchronize(s); + + result[0] = 4; + result[1] = 7; + result[2] = 5; + result[3] = 9; + result[4] = 4; + ASSERT_EQUAL(std::size_t(iter - output.begin()), input.size()); + ASSERT_EQUAL(input, input_copy); + ASSERT_EQUAL(output, result); + // exclusive scan with init and op iter = thrust::exclusive_scan(thrust::cuda::par.on(s), input.begin(), input.end(), output.begin(), 3, thrust::plus()); diff --git a/testing/cuda/sort.cu b/testing/cuda/sort.cu index 5672739d2..c98886d3c 100644 --- a/testing/cuda/sort.cu +++ b/testing/cuda/sort.cu @@ -15,10 +15,35 @@ * limitations under the License. */ +#include +#include #include #include +#include +#include +#include #include +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +# include +namespace _std = ::cuda::std; +#elif defined(__has_include) +# if __has_include() +# include +namespace _std = ::cuda::std; +# else +# include +namespace _std = std; +# endif +#else +# include +namespace _std = std; +#endif + +#include +#include +#include + #include template @@ -155,3 +180,151 @@ void TestComparisonSortCudaStreams() cudaStreamDestroy(s); } DECLARE_UNITTEST(TestComparisonSortCudaStreams); + +template +struct TestRadixSortDispatch +{ + static_assert(thrust::cuda_cub::__smart_sort::can_use_primitive_sort>::value, ""); + static_assert(thrust::cuda_cub::__smart_sort::can_use_primitive_sort>::value, ""); + static_assert(thrust::cuda_cub::__smart_sort::can_use_primitive_sort>::value, ""); + static_assert(thrust::cuda_cub::__smart_sort::can_use_primitive_sort>::value, ""); + + static_assert(thrust::cuda_cub::__smart_sort::can_use_primitive_sort>::value, ""); + static_assert(thrust::cuda_cub::__smart_sort::can_use_primitive_sort>::value, ""); + static_assert(thrust::cuda_cub::__smart_sort::can_use_primitive_sort>::value, ""); + static_assert(thrust::cuda_cub::__smart_sort::can_use_primitive_sort>::value, ""); + + void operator()() const {} +}; +// TODO(bgruber): use a single test case with a concatenated key list and a cartesion product with the comparators +SimpleUnitTest TestRadixSortDispatchIntegralInstance; +SimpleUnitTest TestRadixSortDispatchFPInstance; +/** + * Copy of CUB testing utility + */ +template +struct index_to_key_value_op +{ + static constexpr std::size_t max_key_value = + static_cast(_std::numeric_limits::max()); + static constexpr std::size_t lowest_key_value = + static_cast(_std::numeric_limits::lowest()); + static constexpr std::size_t num_distinct_key_values = (max_key_value - lowest_key_value + std::size_t{1ULL}); + + __device__ __host__ UnsignedIntegralKeyT operator()(std::size_t index) + { + return static_cast(index % num_distinct_key_values); + } +}; + +/** + * Copy of CUB testing utility + */ +template +class index_to_expected_key_op +{ +private: + static constexpr std::size_t max_key_value = + static_cast(_std::numeric_limits::max()); + static constexpr std::size_t lowest_key_value = + static_cast(_std::numeric_limits::lowest()); + static constexpr std::size_t num_distinct_key_values = (max_key_value - lowest_key_value + std::size_t{1ULL}); + + // item_count / num_distinct_key_values + std::size_t expected_count_per_item; + // num remainder items: item_count%num_distinct_key_values + std::size_t num_remainder_items; + // remainder item_count: expected_count_per_item+1 + std::size_t remainder_item_count; + +public: + index_to_expected_key_op(std::size_t num_total_items) + : expected_count_per_item(num_total_items / num_distinct_key_values) + , num_remainder_items(num_total_items % num_distinct_key_values) + , remainder_item_count(expected_count_per_item + std::size_t{1ULL}) + {} + + __device__ __host__ UnsignedIntegralKeyT operator()(std::size_t index) + { + // The first (num_remainder_items * remainder_item_count) are items that appear once more often than the items that + // follow remainder_items_offset + std::size_t remainder_items_offset = num_remainder_items * remainder_item_count; + + UnsignedIntegralKeyT target_item_index = + (index <= remainder_items_offset) + ? + // This is one of the remainder items + static_cast(index / remainder_item_count) + : + // This is an item that appears exactly expected_count_per_item times + static_cast( + num_remainder_items + ((index - remainder_items_offset) / expected_count_per_item)); + return target_item_index; + } +}; + +void TestSortWithMagnitude(int magnitude) +{ + try + { + const std::size_t num_items = 1ull << magnitude; + thrust::device_vector vec(num_items); + auto counting_it = thrust::make_counting_iterator(std::size_t{0}); + auto key_value_it = thrust::make_transform_iterator(counting_it, index_to_key_value_op{}); + auto rev_sorted_it = thrust::make_reverse_iterator(key_value_it + num_items); + thrust::copy(rev_sorted_it, rev_sorted_it + num_items, vec.begin()); + thrust::sort(vec.begin(), vec.end()); + auto expected_result_it = thrust::make_transform_iterator( + thrust::make_counting_iterator(std::size_t{}), index_to_expected_key_op(num_items)); + const bool ok = thrust::equal(expected_result_it, expected_result_it + num_items, vec.cbegin()); + ASSERT_EQUAL(ok, true); + } + catch (std::bad_alloc&) + {} +} + +void TestSortWithLargeNumberOfItems() +{ + TestSortWithMagnitude(39); + TestSortWithMagnitude(32); + TestSortWithMagnitude(33); +} +DECLARE_UNITTEST(TestSortWithLargeNumberOfItems); + +template +struct TestSortAscendingKey +{ + void operator()() const + { + constexpr int n = 10000; + + thrust::host_vector h_data = unittest::random_integers(n); + thrust::device_vector d_data = h_data; + + std::sort(h_data.begin(), h_data.end(), thrust::less{}); + thrust::sort(d_data.begin(), d_data.end(), thrust::less{}); + + ASSERT_EQUAL_QUIET(h_data, d_data); + } +}; + +SimpleUnitTest +#ifndef _LIBCUDACXX_HAS_NO_INT128 + , + unittest::type_list<__int128_t, __uint128_t> +#endif +// CTK 12.2 offers __host__ __device__ operators for __half and __nv_bfloat16, so we can use std::sort +#if _CCCL_CUDACC_VER >= 1202000 +# if defined(_CCCL_HAS_NVFP16) || !defined(__CUDA_NO_HALF_OPERATORS__) && !defined(__CUDA_NO_HALF_CONVERSIONS__) + , + unittest::type_list<__half> +# endif +# if defined(_CCCL_HAS_NVBF16) \ + || !defined(__CUDA_NO_BFLOAT16_OPERATORS__) && !defined(__CUDA_NO_BFLOAT16_CONVERSIONS__) + , + unittest::type_list<__nv_bfloat16> +# endif +#endif // _CCCL_CUDACC_VER >= 1202000 + >> + TestSortAscendingKeyMoreTypes; diff --git a/testing/functional_placeholders_miscellaneous.cu b/testing/functional_placeholders_miscellaneous.cu index 271ecb294..84148a3f8 100644 --- a/testing/functional_placeholders_miscellaneous.cu +++ b/testing/functional_placeholders_miscellaneous.cu @@ -18,6 +18,15 @@ #include #include +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +# include +#elif defined(__has_include) +# if __has_include() +# include +# endif // __has_include() +#endif // THRUST_DEVICE_SYSTEM +#include + #include template @@ -96,3 +105,35 @@ VectorUnitTest TestFunctionalPlaceholdersTransformIteratorInstanceHost; + +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +void TestFunctionalPlaceholdersArgumentValueCategories() +{ + using namespace thrust::placeholders; + auto expr = _1 * _1 + _2 * _2; + int a = 2; + int b = 3; + ASSERT_EQUAL(expr(2, 3), 13); // pass pr-value + ASSERT_EQUAL(expr(a, b), 13); // pass l-value + ASSERT_EQUAL(expr(::cuda::std::move(a), ::cuda::std::move(b)), 13); // pass x-value +} +DECLARE_UNITTEST(TestFunctionalPlaceholdersArgumentValueCategories); + +void TestFunctionalPlaceholdersSemiRegular() +{ + using namespace thrust::placeholders; + using Expr = decltype(_1 * _1 + _2 * _2); + Expr expr; // default-constructible + ASSERT_EQUAL(expr(2, 3), 13); + Expr expr2 = expr; // copy-constructible + ASSERT_EQUAL(expr2(2, 3), 13); + Expr expr3; + expr3 = expr; // copy-assignable + ASSERT_EQUAL(expr3(2, 3), 13); + +# if _CCCL_STD_VER >= 2014 + static_assert(::cuda::std::semiregular, ""); +# endif // _CCCL_STD_VER >= 2014 +} +DECLARE_UNITTEST(TestFunctionalPlaceholdersSemiRegular); +#endif diff --git a/testing/inner_product.cu b/testing/inner_product.cu index 20f356cf0..8eeefd941 100644 --- a/testing/inner_product.cu +++ b/testing/inner_product.cu @@ -126,7 +126,7 @@ struct only_set_when_both_expected long long expected; bool* flag; - THRUST_DEVICE long long operator()(long long x, long long y) + THRUST_HOST_DEVICE long long operator()(long long x, long long y) { if (x == expected && y == expected) { diff --git a/testing/merge_by_key.cu b/testing/merge_by_key.cu index 89ebe5117..b98ceb39e 100644 --- a/testing/merge_by_key.cu +++ b/testing/merge_by_key.cu @@ -16,8 +16,10 @@ */ #include +#include #include #include +#include #include #include #include @@ -269,3 +271,67 @@ void TestMergeByKeyDescending(size_t n) TestMergeByKey>(n); } DECLARE_VARIABLE_UNITTEST(TestMergeByKeyDescending); + +struct def_level_fn +{ + THRUST_DEVICE std::uint32_t operator()(int i) const + { + return static_cast(i + 10); + } +}; + +struct offset_transform +{ + THRUST_DEVICE int operator()(int i) const + { + return i + 1; + } +}; + +// Tests the use of thrust::merge_by_key similar to cuDF in +// https://github.com/rapidsai/cudf/blob/branch-24.08/cpp/src/lists/dremel.cu#L413 +void TestMergeByKeyFromCuDFDremel() +{ + // TODO(bgruber): I have no idea what this code is actually computing, but I tried to replicate the types/iterators + constexpr std::ptrdiff_t empties_size = 123; + constexpr int max_vals_size = 225; + constexpr int level = 4; + constexpr int curr_rep_values_size = 0; + + thrust::device_vector empties(empties_size, 42); + thrust::device_vector empties_idx(empties_size, 13); + + thrust::device_vector temp_rep_vals(max_vals_size); + thrust::device_vector temp_def_vals(max_vals_size); + thrust::device_vector rep_level(max_vals_size); + thrust::device_vector def_level(max_vals_size); + + auto offset_transformer = offset_transform{}; + auto transformed_empties = thrust::make_transform_iterator(empties.begin(), offset_transformer); + + auto input_parent_rep_it = thrust::make_constant_iterator(level); + auto input_parent_def_it = thrust::make_transform_iterator(empties_idx.begin(), def_level_fn{}); + auto input_parent_zip_it = thrust::make_zip_iterator(input_parent_rep_it, input_parent_def_it); + auto input_child_zip_it = thrust::make_zip_iterator(temp_rep_vals.begin(), temp_def_vals.begin()); + auto output_zip_it = thrust::make_zip_iterator(rep_level.begin(), def_level.begin()); + + thrust::merge_by_key( + transformed_empties, + transformed_empties + empties_size, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(curr_rep_values_size), + input_parent_zip_it, + input_child_zip_it, + thrust::make_discard_iterator(), + output_zip_it); + + thrust::device_vector reference_rep_level(max_vals_size); + thrust::fill(reference_rep_level.begin(), reference_rep_level.begin() + empties_size, level); + + thrust::device_vector reference_def_level(max_vals_size); + thrust::fill(reference_def_level.begin(), reference_def_level.begin() + empties_size, 13 + 10); + + ASSERT_EQUAL(reference_rep_level, rep_level); + ASSERT_EQUAL(reference_def_level, def_level); +} +DECLARE_UNITTEST(TestMergeByKeyFromCuDFDremel); diff --git a/testing/omp/reduce_intervals.cu b/testing/omp/reduce_intervals.cu index 409bb3251..9ee0b5637 100644 --- a/testing/omp/reduce_intervals.cu +++ b/testing/omp/reduce_intervals.cu @@ -29,7 +29,7 @@ void reduce_intervals(InputIterator input, OutputIterator output, BinaryFunction using index_type = typename Decomposition::index_type; // wrap binary_op - thrust::detail::wrapped_function wrapped_binary_op(binary_op); + thrust::detail::wrapped_function wrapped_binary_op{binary_op}; for (index_type i = 0; i < decomp.size(); ++i, ++output) { diff --git a/testing/scan.cu b/testing/scan.cu index ae5382fed..bbff5f635 100644 --- a/testing/scan.cu +++ b/testing/scan.cu @@ -25,6 +25,9 @@ #include #include +#include +#include + #include template @@ -57,54 +60,33 @@ void TestScanSimple(void) Vector result(5); Vector output(5); - input[0] = 1; - input[1] = 3; - input[2] = -2; - input[3] = 4; - input[4] = -5; - + input = {1, 3, -2, 4, -5}; Vector input_copy(input); // inclusive scan - iter = thrust::inclusive_scan(input.begin(), input.end(), output.begin()); - result[0] = 1; - result[1] = 4; - result[2] = 2; - result[3] = 6; - result[4] = 1; + iter = thrust::inclusive_scan(input.begin(), input.end(), output.begin()); + result = {1, 4, 2, 6, 1}; ASSERT_EQUAL(std::size_t(iter - output.begin()), input.size()); ASSERT_EQUAL(input, input_copy); ASSERT_EQUAL(output, result); // exclusive scan - iter = thrust::exclusive_scan(input.begin(), input.end(), output.begin(), T(0)); - result[0] = 0; - result[1] = 1; - result[2] = 4; - result[3] = 2; - result[4] = 6; + iter = thrust::exclusive_scan(input.begin(), input.end(), output.begin(), T(0)); + result = {0, 1, 4, 2, 6}; ASSERT_EQUAL(std::size_t(iter - output.begin()), input.size()); ASSERT_EQUAL(input, input_copy); ASSERT_EQUAL(output, result); // exclusive scan with init - iter = thrust::exclusive_scan(input.begin(), input.end(), output.begin(), T(3)); - result[0] = 3; - result[1] = 4; - result[2] = 7; - result[3] = 5; - result[4] = 9; + iter = thrust::exclusive_scan(input.begin(), input.end(), output.begin(), T(3)); + result = {3, 4, 7, 5, 9}; ASSERT_EQUAL(std::size_t(iter - output.begin()), input.size()); ASSERT_EQUAL(input, input_copy); ASSERT_EQUAL(output, result); // inclusive scan with op - iter = thrust::inclusive_scan(input.begin(), input.end(), output.begin(), thrust::plus()); - result[0] = 1; - result[1] = 4; - result[2] = 2; - result[3] = 6; - result[4] = 1; + iter = thrust::inclusive_scan(input.begin(), input.end(), output.begin(), thrust::plus()); + result = {1, 4, 2, 6, 1}; ASSERT_EQUAL(std::size_t(iter - output.begin()), input.size()); ASSERT_EQUAL(input, input_copy); ASSERT_EQUAL(output, result); @@ -117,24 +99,16 @@ void TestScanSimple(void) ASSERT_EQUAL(output, result); // exclusive scan with init and op - iter = thrust::exclusive_scan(input.begin(), input.end(), output.begin(), T(3), thrust::plus()); - result[0] = 3; - result[1] = 4; - result[2] = 7; - result[3] = 5; - result[4] = 9; + iter = thrust::exclusive_scan(input.begin(), input.end(), output.begin(), T(3), thrust::plus()); + result = {3, 4, 7, 5, 9}; ASSERT_EQUAL(std::size_t(iter - output.begin()), input.size()); ASSERT_EQUAL(input, input_copy); ASSERT_EQUAL(output, result); // inplace inclusive scan - input = input_copy; - iter = thrust::inclusive_scan(input.begin(), input.end(), input.begin()); - result[0] = 1; - result[1] = 4; - result[2] = 2; - result[3] = 6; - result[4] = 1; + input = input_copy; + iter = thrust::inclusive_scan(input.begin(), input.end(), input.begin()); + result = {1, 4, 2, 6, 1}; ASSERT_EQUAL(std::size_t(iter - input.begin()), input.size()); ASSERT_EQUAL(input, result); @@ -146,24 +120,16 @@ void TestScanSimple(void) ASSERT_EQUAL(input, result); // inplace exclusive scan with init - input = input_copy; - iter = thrust::exclusive_scan(input.begin(), input.end(), input.begin(), T(3)); - result[0] = 3; - result[1] = 4; - result[2] = 7; - result[3] = 5; - result[4] = 9; + input = input_copy; + iter = thrust::exclusive_scan(input.begin(), input.end(), input.begin(), T(3)); + result = {3, 4, 7, 5, 9}; ASSERT_EQUAL(std::size_t(iter - input.begin()), input.size()); ASSERT_EQUAL(input, result); // inplace exclusive scan with implicit init=0 - input = input_copy; - iter = thrust::exclusive_scan(input.begin(), input.end(), input.begin()); - result[0] = 0; - result[1] = 1; - result[2] = 4; - result[3] = 2; - result[4] = 6; + input = input_copy; + iter = thrust::exclusive_scan(input.begin(), input.end(), input.begin()); + result = {0, 1, 4, 2, 6}; ASSERT_EQUAL(std::size_t(iter - input.begin()), input.size()); ASSERT_EQUAL(input, result); } @@ -282,18 +248,8 @@ template void TestScanMixedTypes(void) { // make sure we get types for default args and operators correct - IntVector int_input(4); - int_input[0] = 1; - int_input[1] = 2; - int_input[2] = 3; - int_input[3] = 4; - - FloatVector float_input(4); - float_input[0] = 1.5; - float_input[1] = 2.5; - float_input[2] = 3.5; - float_input[3] = 4.5; - + IntVector int_input{1, 2, 3, 4}; + FloatVector float_input{1.5, 2.5, 3.5, 4.5}; IntVector int_output(4); FloatVector float_output(4); @@ -575,32 +531,11 @@ void TestInclusiveScanWithIndirection(void) // add numbers modulo 3 with external lookup table using T = typename Vector::value_type; - Vector data(7); - data[0] = 0; - data[1] = 1; - data[2] = 2; - data[3] = 1; - data[4] = 2; - data[5] = 0; - data[6] = 1; - - Vector table(6); - table[0] = 0; - table[1] = 1; - table[2] = 2; - table[3] = 0; - table[4] = 1; - table[5] = 2; - + Vector data{0, 1, 2, 1, 2, 0, 1}; + Vector table{0, 1, 2, 0, 1, 2}; thrust::inclusive_scan(data.begin(), data.end(), data.begin(), plus_mod3(thrust::raw_pointer_cast(&table[0]))); - ASSERT_EQUAL(data[0], T(0)); - ASSERT_EQUAL(data[1], T(1)); - ASSERT_EQUAL(data[2], T(0)); - ASSERT_EQUAL(data[3], T(1)); - ASSERT_EQUAL(data[4], T(0)); - ASSERT_EQUAL(data[5], T(0)); - ASSERT_EQUAL(data[6], T(1)); + ASSERT_EQUAL(data, (Vector{0, 1, 0, 1, 0, 0, 1})); } DECLARE_INTEGRAL_VECTOR_UNITTEST(TestInclusiveScanWithIndirection); @@ -624,34 +559,12 @@ void TestInclusiveScanWithConstAccumulator(void) { // add numbers modulo 3 with external lookup table using T = typename Vector::value_type; - - Vector data(7); - data[0] = 0; - data[1] = 1; - data[2] = 2; - data[3] = 1; - data[4] = 2; - data[5] = 0; - data[6] = 1; - - Vector table(6); - table[0] = 0; - table[1] = 1; - table[2] = 2; - table[3] = 0; - table[4] = 1; - table[5] = 2; - + Vector data{0, 1, 2, 1, 2, 0, 1}; + Vector table{0, 1, 2, 0, 1, 2}; thrust::inclusive_scan( data.begin(), data.end(), data.begin(), const_ref_plus_mod3(thrust::raw_pointer_cast(&table[0]))); - ASSERT_EQUAL(data[0], T(0)); - ASSERT_EQUAL(data[1], T(1)); - ASSERT_EQUAL(data[2], T(0)); - ASSERT_EQUAL(data[3], T(1)); - ASSERT_EQUAL(data[4], T(0)); - ASSERT_EQUAL(data[5], T(0)); - ASSERT_EQUAL(data[6], T(1)); + ASSERT_EQUAL(data, (Vector{0, 1, 0, 1, 0, 0, 1})); } DECLARE_INTEGRAL_VECTOR_UNITTEST(TestInclusiveScanWithConstAccumulator); @@ -779,3 +692,95 @@ void TestInclusiveScanWithUserDefinedType() ASSERT_EQUAL(static_cast(vec.back()).i, 5); } DECLARE_UNITTEST(TestInclusiveScanWithUserDefinedType); + +// Represents a permutation as a tuple of integers, see also: https://en.wikipedia.org/wiki/Permutation +// We need a distinct type (instead of an alias) for operator<< to be found via ADL +struct permutation_t : std::array +{ + permutation_t() = default; + + constexpr THRUST_HOST_DEVICE permutation_t(int a, int b, int c, int d, int e) + : std::array{a, b, c, d, e} + {} + + friend std::ostream& operator<<(std::ostream& os, const permutation_t& p) + { + os << '{'; + for (std::size_t i = 0; i < p.size(); i++) + { + if (i > 0) + { + os << ", "; + } + os << p[i]; + } + return os << '}'; + } +}; + +// Composes two permutations. This operation is associative, but not commutative. +struct composition_op_t +{ + THRUST_HOST_DEVICE permutation_t operator()(permutation_t lhs, permutation_t rhs) const + { + permutation_t result; + // Get raw pointers to the underlying data to avoid operator[] which + // results in debug-assert calls (and __glibcxx_assert_fail) on device. + auto lhs_ptr = lhs.data(); + auto rhs_ptr = rhs.data(); + auto result_ptr = result.data(); + for (std::size_t i = 0; i < lhs.size(); i++) + { + result_ptr[i] = rhs_ptr[lhs_ptr[i]]; + } + return result; + } +}; + +void TestInclusiveScanWithNonCommutativeOp() +{ + const thrust::device_vector input = { + {3, 2, 0, 1, 4}, + {2, 4, 0, 1, 3}, + {3, 2, 1, 4, 0}, + {4, 3, 1, 0, 2}, + {0, 3, 2, 4, 1}, + {3, 2, 1, 0, 4}, + {3, 4, 1, 2, 0}, + {4, 2, 1, 0, 3}, + {4, 0, 1, 3, 2}, + {0, 2, 3, 1, 4}}; + thrust::device_vector output(10); + constexpr auto identity = permutation_t{0, 1, 2, 3, 4}; + + thrust::inclusive_scan(input.begin(), input.end(), output.begin(), composition_op_t{}); + ASSERT_EQUAL( + output, + (thrust::device_vector{ + {3, 2, 0, 1, 4}, + {1, 0, 2, 4, 3}, + {2, 3, 1, 0, 4}, + {1, 0, 3, 4, 2}, + {3, 0, 4, 1, 2}, + {0, 3, 4, 2, 1}, + {3, 2, 0, 1, 4}, + {0, 1, 4, 2, 3}, + {4, 0, 2, 1, 3}, + {4, 0, 3, 2, 1}})); + + thrust::exclusive_scan(input.begin(), input.end(), output.begin(), identity, composition_op_t{}); + ASSERT_EQUAL( + output, + (thrust::device_vector{ + {0, 1, 2, 3, 4}, + {3, 2, 0, 1, 4}, + {1, 0, 2, 4, 3}, + {2, 3, 1, 0, 4}, + {1, 0, 3, 4, 2}, + {3, 0, 4, 1, 2}, + {0, 3, 4, 2, 1}, + {3, 2, 0, 1, 4}, + {0, 1, 4, 2, 3}, + {4, 0, 2, 1, 3}})); +} +DECLARE_UNITTEST(TestInclusiveScanWithNonCommutativeOp); diff --git a/testing/sort.cu b/testing/sort.cu index 4ca6c7385..8147002e6 100644 --- a/testing/sort.cu +++ b/testing/sort.cu @@ -173,5 +173,20 @@ struct TestRadixSortDispatch void operator()() const {} }; -SimpleUnitTest TestRadixSortDispatchIntegralInstance; -SimpleUnitTest TestRadixSortDispatchFPInstance; +SimpleUnitTest +#endif // _LIBCUDACXX_HAS_NO_INT128 +#ifdef _CCCL_HAS_NVFP16 + , + unittest::type_list<__half> +#endif // _CCCL_HAS_NVFP16 +#ifdef _CCCL_HAS_NVBF16 + , + unittest::type_list<__nv_bfloat16> +#endif // _CCCL_HAS_NVBF16 + >> + TestRadixSortDispatchInstance; diff --git a/testing/tabulate_output_iterator.cu b/testing/tabulate_output_iterator.cu new file mode 100644 index 000000000..12c4693bc --- /dev/null +++ b/testing/tabulate_output_iterator.cu @@ -0,0 +1,167 @@ +/* + * Copyright 2024 NVIDIA Corporation + * Modifications Copyright© 2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +template +struct host_write_op +{ + OutItT out; + + template + THRUST_HOST void operator()(IndexT index, T val) + { + out[index] = val; + } +}; + +template +struct host_write_first_op +{ + OutItT out; + + template + THRUST_HOST void operator()(IndexT index, T val) + { + // val is a thrust::tuple(value, input_index). Only write out the value part. + out[index] = thrust::get<0>(val); + } +}; + +template +struct device_write_first_op +{ + OutItT out; + + template + THRUST_DEVICE void operator()(IndexT index, T val) + { + // val is a thrust::tuple(value, input_index). Only write out the value part. + out[index] = thrust::get<0>(val); + } +}; + +struct select_op +{ + std::size_t select_every_nth; + + template + THRUST_HOST_DEVICE bool operator()(thrust::tuple key_index_pair) + { + // Select every n-th item + return (thrust::get<1>(key_index_pair) % select_every_nth == 0); + } +}; + +struct index_to_gather_index_op +{ + std::size_t gather_stride; + + template + THRUST_HOST_DEVICE IndexT operator()(IndexT index) + { + // Gather the i-th output item from input[i*3] + return index * static_cast(gather_stride); + } +}; + +template +void TestTabulateOutputIterator() +{ + using T = typename Vector::value_type; + using it_t = typename Vector::iterator; + using space = typename thrust::iterator_system::type; + + static constexpr std::size_t num_items = 240; + Vector input(num_items); + Vector output(num_items, T{42}); + + // Use operator type that supports the targeted system + using op_t = typename std::conditional<(std::is_same::value), + host_write_first_op, + device_write_first_op>::type; + + // Construct tabulate_output_iterator + op_t op{output.begin()}; + auto tabulate_out_it = thrust::make_tabulate_output_iterator(op); + + // Prepare input + thrust::sequence(input.begin(), input.end(), 1); + auto iota_it = thrust::make_counting_iterator(0); + auto zipped_in = thrust::make_zip_iterator(input.begin(), iota_it); + + // Run copy_if using tabulate_output_iterator as the output iterator + static constexpr std::size_t select_every_nth = 3; + auto selected_it_end = + thrust::copy_if(zipped_in, zipped_in + num_items, tabulate_out_it, select_op{select_every_nth}); + const auto num_selected = static_cast(thrust::distance(tabulate_out_it, selected_it_end)); + + // Prepare expected data + Vector expected_output(num_items, T{42}); + const std::size_t expected_num_selected = (num_items + select_every_nth - 1) / select_every_nth; + auto gather_index_it = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), index_to_gather_index_op{select_every_nth}); + thrust::gather(gather_index_it, gather_index_it + expected_num_selected, input.cbegin(), expected_output.begin()); + + ASSERT_EQUAL(expected_num_selected, num_selected); + ASSERT_EQUAL(output, expected_output); +} +DECLARE_VECTOR_UNITTEST(TestTabulateOutputIterator); + +void TestTabulateOutputIterator() +{ + using vector_t = thrust::host_vector; + using vec_it_t = typename vector_t::iterator; + using op_t = host_write_op; + + vector_t out(4, 42); + thrust::tabulate_output_iterator tabulate_out_it{op_t{out.begin()}}; + + tabulate_out_it[1] = 2; + ASSERT_EQUAL(out[0], 42); + ASSERT_EQUAL(out[1], 2); + ASSERT_EQUAL(out[2], 42); + ASSERT_EQUAL(out[3], 42); + + tabulate_out_it[3] = 0; + ASSERT_EQUAL(out[0], 42); + ASSERT_EQUAL(out[1], 2); + ASSERT_EQUAL(out[2], 42); + ASSERT_EQUAL(out[3], 0); + + tabulate_out_it[1] = 4; + ASSERT_EQUAL(out[0], 42); + ASSERT_EQUAL(out[1], 4); + ASSERT_EQUAL(out[2], 42); + ASSERT_EQUAL(out[3], 0); +} + +DECLARE_UNITTEST(TestTabulateOutputIterator); diff --git a/testing/transform_iterator.cu b/testing/transform_iterator.cu index eb9f48550..65503b013 100644 --- a/testing/transform_iterator.cu +++ b/testing/transform_iterator.cu @@ -23,6 +23,9 @@ #include #include +#include +#include +#include #include @@ -125,3 +128,102 @@ void TestTransformIteratorNonCopyable() } DECLARE_UNITTEST(TestTransformIteratorNonCopyable); + +struct flip_value +{ + THRUST_HOST_DEVICE bool operator()(bool b) const + { + return !b; + } +}; + +struct pass_ref +{ + THRUST_HOST_DEVICE const bool& operator()(const bool& b) const + { + return b; + } +}; + +// TODO(bgruber): replace by libc++ with C++14 +struct forward +{ + template + constexpr _Tp&& operator()(_Tp&& __t) const noexcept + { + return std::forward<_Tp>(__t); + } +}; + +void TestTransformIteratorReferenceAndValueType() +{ + { + thrust::host_vector v; + + auto it = v.begin(); + static_assert(std::is_same::value, ""); // ordinary reference + static_assert(std::is_same::value, ""); + + auto it_tr_val = thrust::make_transform_iterator(it, flip_value{}); + static_assert(std::is_same::value, ""); + static_assert(std::is_same::value, ""); + (void) it_tr_val; + + auto it_tr_ref = thrust::make_transform_iterator(it, pass_ref{}); + static_assert(std::is_same::value, ""); + static_assert(std::is_same::value, ""); + (void) it_tr_ref; + + auto it_tr_fwd = thrust::make_transform_iterator(it, forward{}); + static_assert(std::is_same::value, ""); + static_assert(std::is_same::value, ""); + (void) it_tr_fwd; + } + + { + thrust::device_vector v; + + auto it = v.begin(); + static_assert(std::is_same>::value, ""); // proxy reference + static_assert(std::is_same::value, ""); + + auto it_tr_val = thrust::make_transform_iterator(it, flip_value{}); + static_assert(std::is_same::value, ""); + static_assert(std::is_same::value, ""); + (void) it_tr_val; + + auto it_tr_ref = thrust::make_transform_iterator(it, pass_ref{}); + static_assert(std::is_same::value, ""); + static_assert(std::is_same::value, ""); + (void) it_tr_ref; + + auto it_tr_fwd = thrust::make_transform_iterator(it, forward{}); + static_assert(std::is_same::value, ""); // wrapped reference is decayed + static_assert(std::is_same::value, ""); + (void) it_tr_fwd; + } + + { + std::vector v; + + auto it = v.begin(); + static_assert(std::is_same::reference>::value, ""); // proxy reference + static_assert(std::is_same::value, ""); + + auto it_tr_val = thrust::make_transform_iterator(it, flip_value{}); + static_assert(std::is_same::value, ""); + static_assert(std::is_same::value, ""); + (void) it_tr_val; + + auto it_tr_ref = thrust::make_transform_iterator(it, pass_ref{}); + static_assert(std::is_same::value, ""); + static_assert(std::is_same::value, ""); + (void) it_tr_ref; + + auto it_tr_fwd = thrust::make_transform_iterator(it, forward{}); + static_assert(std::is_same::value, ""); // proxy reference is decayed + static_assert(std::is_same::value, ""); + (void) it_tr_fwd; + } +} +DECLARE_UNITTEST(TestTransformIteratorReferenceAndValueType); diff --git a/testing/type_traits.cu b/testing/type_traits.cu index 30b1f589a..d6abf41b1 100644 --- a/testing/type_traits.cu +++ b/testing/type_traits.cu @@ -22,8 +22,33 @@ #include #include #include +#include +#include #include +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +# if defined(THRUST_GCC_VERSION) && THRUST_GCC_VERSION >= 70000 +// This header pulls in an unsuppressable warning on GCC 6 +# include +# endif // defined(THRUST_GCC_VERSION) && THRUST_GCC_VERSION >= 70000 +# include +# include +# include +#elif defined(__has_include) +# if __has_include() +# include +# endif // __has_include() +# if __has_include() +# include +# endif // __has_include() +# if __has_include() +# include +# endif // __has_include() +# if __has_include() +# include +# endif // __has_include() +#endif // THRUST_DEVICE_SYSTEM + #include struct non_pod @@ -218,3 +243,56 @@ void TestIsCommutative(void) } } DECLARE_UNITTEST(TestIsCommutative); + +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +struct NonTriviallyCopyable +{ + NonTriviallyCopyable(const NonTriviallyCopyable&) {} +}; +THRUST_PROCLAIM_TRIVIALLY_RELOCATABLE(NonTriviallyCopyable); + +static_assert(!::cuda::std::is_trivially_copyable::value, ""); +static_assert(thrust::is_trivially_relocatable::value, ""); + +void TestTriviallyRelocatable() +{ + static_assert(thrust::is_trivially_relocatable::value, ""); +# if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA + static_assert(thrust::is_trivially_relocatable<__half>::value, ""); + static_assert(thrust::is_trivially_relocatable::value, ""); + static_assert(thrust::is_trivially_relocatable::value, ""); + static_assert(thrust::is_trivially_relocatable::value, ""); + static_assert(thrust::is_trivially_relocatable::value, ""); +# ifndef _LIBCUDACXX_HAS_NO_INT128 + static_assert(thrust::is_trivially_relocatable<__int128>::value, ""); +# endif // _LIBCUDACXX_HAS_NO_INT128 +# endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +# if defined(THRUST_GCC_VERSION) && THRUST_GCC_VERSION >= 70000 + static_assert(thrust::is_trivially_relocatable>::value, ""); + static_assert(thrust::is_trivially_relocatable<::cuda::std::complex>::value, ""); + static_assert(thrust::is_trivially_relocatable>>::value, ""); + static_assert(thrust::is_trivially_relocatable<::cuda::std::pair>>::value, ""); + static_assert(thrust::is_trivially_relocatable, char>>::value, ""); + static_assert(thrust::is_trivially_relocatable<::cuda::std::tuple, char>>::value, + ""); +# endif // defined(THRUST_GCC_VERSION) && THRUST_GCC_VERSION >= 70000 + static_assert(thrust::is_trivially_relocatable< + ::cuda::std::tuple>>, + thrust::tuple<::cuda::std::pair>, int>>>::value, + ""); + + static_assert(!thrust::is_trivially_relocatable>::value, ""); + static_assert(!thrust::is_trivially_relocatable<::cuda::std::pair>::value, ""); + static_assert(!thrust::is_trivially_relocatable>::value, ""); + static_assert(!thrust::is_trivially_relocatable<::cuda::std::tuple>::value, ""); + + // test propagation of relocatability through pair and tuple + static_assert(thrust::is_trivially_relocatable::value, ""); + static_assert(thrust::is_trivially_relocatable>::value, ""); + static_assert(thrust::is_trivially_relocatable<::cuda::std::pair>::value, ""); + static_assert(thrust::is_trivially_relocatable>::value, ""); + static_assert(thrust::is_trivially_relocatable<::cuda::std::tuple>::value, ""); +}; +DECLARE_UNITTEST(TestTriviallyRelocatable); + +#endif diff --git a/testing/unique.cu b/testing/unique.cu index b51ebd93e..3e3a1a710 100644 --- a/testing/unique.cu +++ b/testing/unique.cu @@ -20,6 +20,12 @@ #include #include +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +# include +#else +# include +#endif + #include template @@ -341,3 +347,39 @@ struct TestUniqueCount } }; VariableUnitTest TestUniqueCountInstance; + +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +template +using DeviceArray = cuda::std::array; +#else // THRUST_DEVICE_SYSTEM != THRUST_DEVICE_SYSTEM_CUDA +template +struct DeviceArray +{ + T data[N]; + + // Host and device-compatible equality operator + __host__ __device__ bool operator==(const DeviceArray& other) const + { + for (std::size_t i = 0; i < N; ++i) + { + if (data[i] != other.data[i]) + { + return false; + } + } + return true; + } +}; +#endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA + +template +struct TestUniqueMemoryAccess +{ + void operator()(void) + { + thrust::device_vector> v(10); + thrust::unique(v.begin(), v.end()); + } +}; + +SimpleUnitTest> TestUniqueMemoryAccessInstance; diff --git a/testing/unittest/assertions.h b/testing/unittest/assertions.h index 75d2456b2..5e85b2f16 100644 --- a/testing/unittest/assertions.h +++ b/testing/unittest/assertions.h @@ -24,6 +24,8 @@ #include #include +#include + #include #include @@ -404,6 +406,17 @@ class almost_equal_to> //// // check sequences +inline int promote_char(char c) +{ + return c; +} + +template +T&& promote_char(T&& t) +{ + return std::forward(t); +} + template void assert_equal( ForwardIterator1 first1, @@ -453,16 +466,7 @@ void assert_equal( if (mismatches <= MAX_OUTPUT_LINES) { - THRUST_IF_CONSTEXPR (sizeof(InputType) == 1) - { - f << " [" << i << "] " << *first1 + InputType() << " " << *first2 + InputType() << "\n"; // unprintable - // chars are a - // problem - } - else - { - f << " [" << i << "] " << *first1 << " " << *first2 << "\n"; - } + f << " [" << i << "] " << promote_char(*first1) << " " << promote_char(*first2) << "\n"; } } diff --git a/testing/unittest/meta.h b/testing/unittest/meta.h index 7d9ad3048..71cc4d47a 100644 --- a/testing/unittest/meta.h +++ b/testing/unittest/meta.h @@ -184,4 +184,19 @@ struct transform2, type_list, Template> using type = type_list::type...>; }; +template +struct concat; + +template +struct concat +{ + using type = L; +}; + +template