diff --git a/rocprim/include/rocprim/intrinsics/thread.hpp b/rocprim/include/rocprim/intrinsics/thread.hpp index 15fb48023..353414bba 100644 --- a/rocprim/include/rocprim/intrinsics/thread.hpp +++ b/rocprim/include/rocprim/intrinsics/thread.hpp @@ -151,7 +151,7 @@ ROCPRIM_DEVICE inline auto flat_block_id() -> typename std::enable_if<(BlockSizeY > 1 && BlockSizeZ == 1), unsigned int>::type { - return hipThreadIdx_x + (hipBlockIdx_y * hipGridDim_x); + return hipBlockIdx_x + (hipBlockIdx_y * hipGridDim_x); } template @@ -159,7 +159,7 @@ ROCPRIM_DEVICE inline auto flat_block_id() -> typename std::enable_if<(BlockSizeY > 1 && BlockSizeZ > 1), unsigned int>::type { - return hipThreadIdx_x + (hipBlockIdx_y * hipGridDim_x) + + return hipBlockIdx_x + (hipBlockIdx_y * hipGridDim_x) + (hipBlockIdx_z * hipGridDim_y * hipGridDim_x); } diff --git a/test/rocprim/test_block_scan.cpp b/test/rocprim/test_block_scan.cpp index 1b9369143..295855504 100644 --- a/test/rocprim/test_block_scan.cpp +++ b/test/rocprim/test_block_scan.cpp @@ -244,10 +244,10 @@ struct static_run_algo } // Verifying results - test_utils::assert_near(output, expected, 0.01); + test_utils::assert_near(output, expected, test_utils::precision_threshold::percentage); if(device_output_b) { - test_utils::assert_near(output_b, expected_b, 0.01); + test_utils::assert_near(output_b, expected_b, test_utils::precision_threshold::percentage); } } }; diff --git a/test/rocprim/test_utils.hpp b/test/rocprim/test_utils.hpp index 853ff1b9f..66ab000a3 100644 --- a/test/rocprim/test_utils.hpp +++ b/test/rocprim/test_utils.hpp @@ -43,6 +43,21 @@ std::ostream& operator<<(std::ostream& stream, const rocprim::half& value) namespace test_utils { +static constexpr uint32_t random_data_generation_segments = 32; +static constexpr uint32_t random_data_generation_repeat_strides = 4; + +template +struct precision_threshold +{ + static constexpr float percentage = 0.01f; +}; + +template<> +struct precision_threshold +{ + static constexpr float percentage = 0.05f; +}; + // Support half operators on host side ROCPRIM_HOST inline @@ -209,7 +224,33 @@ inline auto get_random_data(size_t size, T min, T max, int seed_value) gen.seed(seed_value); std::uniform_int_distribution distribution(min, max); std::vector data(size); - std::generate(data.begin(), data.end(), [&]() { return distribution(gen); }); + uint32_t segment_size = size / random_data_generation_segments; + if(segment_size != 0) + { + for(uint32_t segment_index = 0; segment_index < random_data_generation_segments; segment_index++) + { + if(segment_index % random_data_generation_repeat_strides == 0) + { + T repeated_value = distribution(gen); + std::fill( + data.begin() + segment_size * segment_index, + data.begin() + segment_size * (segment_index + 1), + repeated_value); + + } + else + { + std::generate( + data.begin() + segment_size * segment_index, + data.begin() + segment_size * (segment_index + 1), + [&]() { return distribution(gen); }); + } + } + } + else + { + std::generate(data.begin(), data.end(), [&]() { return distribution(gen); }); + } return data; } @@ -224,7 +265,34 @@ inline auto get_random_data(size_t size, T min, T max, int seed_value) using dis_type = typename std::conditional::value, float, T>::type; std::uniform_real_distribution distribution(min, max); std::vector data(size); - std::generate(data.begin(), data.end(), [&]() { return distribution(gen); }); + uint32_t segment_size = size / random_data_generation_segments; + if(segment_size != 0) + { + for(uint32_t segment_index = 0; segment_index < random_data_generation_segments; segment_index++) + { + if(segment_index % random_data_generation_repeat_strides == 0) + { + T repeated_value = distribution(gen); + std::fill( + data.begin() + segment_size * segment_index, + data.begin() + segment_size * (segment_index + 1), + repeated_value); + + } + else + { + std::generate( + data.begin() + segment_size * segment_index, + data.begin() + segment_size * (segment_index + 1), + [&]() { return distribution(gen); }); + } + } + } + else + { + std::generate(data.begin(), data.end(), [&]() { return distribution(gen); }); + + } return data; } @@ -252,7 +320,7 @@ template inline auto get_random_value(T min, T max, int seed_value) -> typename std::enable_if::value, T>::type { - return get_random_data(1, min, max, seed_value)[0]; + return get_random_data(random_data_generation_segments, min, max, seed_value)[0]; } // Can't use std::prefix_sum for inclusive/exclusive scan, because @@ -715,7 +783,33 @@ inline auto get_random_data(size_t size, typename T::value_type min, typename T: gen.seed(seed_value); std::uniform_int_distribution distribution(min, max); std::vector data(size); - std::generate(data.begin(), data.end(), [&]() { return T(distribution(gen), distribution(gen)); }); + uint32_t segment_size = size / random_data_generation_segments; + if(segment_size != 0) + { + for(uint32_t segment_index = 0; segment_index < random_data_generation_segments; segment_index++) + { + if(segment_index % random_data_generation_repeat_strides == 0) + { + T repeated_value = T(distribution(gen), distribution(gen)); + std::fill( + data.begin() + segment_size * segment_index, + data.begin() + segment_size * (segment_index + 1), + repeated_value); + + } + else + { + std::generate( + data.begin() + segment_size * segment_index, + data.begin() + segment_size * (segment_index + 1), + [&]() { return T(distribution(gen), distribution(gen)); }); + } + } + } + else + { + std::generate(data.begin(), data.end(), [&]() { return T(distribution(gen), distribution(gen)); }); + } return data; } @@ -731,7 +825,33 @@ inline auto get_random_data(size_t size, typename T::value_type min, typename T: gen.seed(seed_value); std::uniform_real_distribution distribution(min, max); std::vector data(size); - std::generate(data.begin(), data.end(), [&]() { return T(distribution(gen), distribution(gen)); }); + uint32_t segment_size = size / random_data_generation_segments; + if(segment_size != 0) + { + for(uint32_t segment_index = 0; segment_index < random_data_generation_segments; segment_index++) + { + if(segment_index % random_data_generation_repeat_strides == 0) + { + T repeated_value = T(distribution(gen), distribution(gen)); + std::fill( + data.begin() + segment_size * segment_index, + data.begin() + segment_size * (segment_index + 1), + repeated_value); + + } + else + { + std::generate( + data.begin() + segment_size * segment_index, + data.begin() + segment_size * (segment_index + 1), + [&]() { return T(distribution(gen), distribution(gen)); }); + } + } + } + else + { + std::generate(data.begin(), data.end(), [&]() { return T(distribution(gen), distribution(gen)); }); + } return data; } @@ -766,7 +886,7 @@ template inline auto get_random_value(typename T::value_type min, typename T::value_type max, int seed_value) -> typename std::enable_if::value || is_custom_test_array_type::value, T>::type { - return get_random_data(1, min, max, seed_value)[0]; + return get_random_data(random_data_generation_segments, min, max, seed_value)[0]; } template