Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Full documentation for rocPRIM is available at [https://rocm.docs.amd.com/projec
* Added initial value support for warp- and block-level inclusive scan.
* Added support for building tests with device-side random data generation, making them finish faster. This requires rocRAND, and is enabled with the `WITH_ROCRAND=ON` build flag.
* Added additional unit tests for `test_block_scan.hpp`
* Added additional unit tests for `test_block_sort.hpp`

### Changed

Expand Down
349 changes: 348 additions & 1 deletion test/rocprim/test_block_sort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,287 @@ void TestSortStableKey(std::vector<size_t> sizes)
}
}
}

template<unsigned int block_size,
unsigned int items_per_thread,
class key_type,
class value_type,
rocprim::block_sort_algorithm algo,
class binary_op_type>
void TestSortKeyNoSize()
{
int device_id = test_common_utils::obtain_device_from_ctest();
SCOPED_TRACE(testing::Message() << "with device_id = " << device_id);
HIP_CHECK(hipSetDevice(device_id));

static constexpr const unsigned int items_per_block = block_size * items_per_thread;
hipStream_t stream = 0; // default

if(!is_buildable(block_size, items_per_thread, algo))
{
GTEST_SKIP();
}

for(size_t seed_index = 0; seed_index < number_of_runs; seed_index++)
{
unsigned int seed_value
= seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count];
SCOPED_TRACE(testing::Message() << "with seed = " << seed_value);

size_t grid_size = 1134;
size_t size = items_per_block * grid_size;
SCOPED_TRACE(testing::Message() << "with size = " << size);
// Generate data
std::vector<key_type> output
= test_utils::get_random_data_wrapped<key_type>(size, -100, 100, seed_value);

// Calculate expected results on host
std::vector<key_type> expected(output);
binary_op_type binary_op;
for(size_t i = 0; i < grid_size; i++)
{
std::sort(expected.begin() + (i * items_per_block),
expected.begin() + std::min(size, ((i + 1) * items_per_block)),
binary_op);
}

// Preparing device
common::device_ptr<key_type> device_key_output(output);

hipLaunchKernelGGL(HIP_KERNEL_NAME(sort_keys_kernel_no_size<block_size,
items_per_thread,
key_type*,
algo,
binary_op_type>),
dim3(grid_size),
dim3(block_size),
0,
stream,
device_key_output.get()
);

// Reading results back
output = device_key_output.load();

test_utils::assert_eq(output, expected);
}
}

template<unsigned int block_size,
unsigned int items_per_thread,
class key_type,
class value_type,
rocprim::block_sort_algorithm algo,
class binary_op_type>
void TestSortKeyValueNoSize()
{
int device_id = test_common_utils::obtain_device_from_ctest();
SCOPED_TRACE(testing::Message() << "with device_id = " << device_id);
HIP_CHECK(hipSetDevice(device_id));

static constexpr const unsigned int items_per_block = block_size * items_per_thread;
static constexpr const size_t grid_size = 1134;
static constexpr const size_t size = items_per_block * grid_size;
hipStream_t stream = 0; // default

if(!is_buildable(block_size, items_per_thread, algo))
{
GTEST_SKIP();
}

for(size_t seed_index = 0; seed_index < number_of_runs; seed_index++)
{
unsigned int seed_value
= seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count];
SCOPED_TRACE(testing::Message() << "with seed = " << seed_value);
SCOPED_TRACE(testing::Message() << "with size = " << size);

// Generate data
std::vector<key_type> output_key
= test_utils::get_random_data_wrapped<key_type>(size, 0, 100, seed_value);
std::vector<value_type> output_value
= test_utils::get_random_data_wrapped<value_type>(size, -100, 100, seed_value);

// Combine vectors to form pairs with key and value
std::vector<std::pair<key_type, value_type>> target(size);
for(unsigned i = 0; i < target.size(); i++)
target[i] = std::make_pair(output_key[i], output_value[i]);

// Calculate expected results on host
using key_value = std::pair<key_type, value_type>;
std::vector<key_value> expected(target);
constexpr bool descending = !std::is_same<binary_op_type, rocprim::less<key_type>>::value;
for(size_t i = 0; i < expected.size() / items_per_block; i++)
{
std::sort(expected.begin() + (i * items_per_block),
expected.begin() + ((i + 1) * items_per_block),
test_utils::key_value_comparator<key_type,
value_type,
descending,
0,
sizeof(key_type) * 8>());
}

// Preparing device
common::device_ptr<key_type> device_key_output(output_key);
common::device_ptr<value_type> device_value_output(output_value);

// Running kernel, ignored if invalid size
hipLaunchKernelGGL(HIP_KERNEL_NAME(sort_pairs_kernel_no_size<block_size,
items_per_thread,
key_type,
value_type,
algo,
binary_op_type>),
dim3(grid_size),
dim3(block_size),
0,
stream,
device_key_output.get(),
device_value_output.get());
HIP_CHECK(hipGetLastError());

// Reading results back
output_key = device_key_output.load();
output_value = device_value_output.load();

std::vector<key_type> expected_key(expected.size());
std::vector<value_type> expected_value(expected.size());
for(size_t i = 0; i < expected.size(); i++)
{
expected_key[i] = expected[i].first;
expected_value[i] = expected[i].second;
}

// Keys are sorted, Values order not guaranteed
// Sort subsets where key was the same to make sure all values are still present
using value_op_type = rocprim::less<value_type>;
using eq_op_type = rocprim::equal_to<key_type>;
value_op_type value_op;
eq_op_type eq_op;
for(size_t i = 0; i < output_key.size();)
{
auto j = i;
for(; j < output_key.size() && eq_op(output_key[j], output_key[i]); ++j)
{}
std::sort(output_value.begin() + i, output_value.begin() + j, value_op);
std::sort(expected_value.begin() + i, expected_value.begin() + j, value_op);
i = j;
}

test_utils::assert_eq(output_key, expected_key);
test_utils::assert_eq(output_value, expected_value);
}
}

template<unsigned int block_size,
unsigned int items_per_thread,
class key_type,
class value_type,
rocprim::block_sort_algorithm algo,
class binary_op_type>
void TestSortKeyValueWithSize()
{
int device_id = test_common_utils::obtain_device_from_ctest();
SCOPED_TRACE(testing::Message() << "with device_id = " << device_id);
HIP_CHECK(hipSetDevice(device_id));

static constexpr const unsigned int items_per_block = block_size * items_per_thread;
static constexpr const size_t grid_size = 1134;
static constexpr const size_t size = items_per_block * grid_size;
hipStream_t stream = 0; // default

if(!is_buildable(block_size, items_per_thread, algo))
{
GTEST_SKIP();
}

for(size_t seed_index = 0; seed_index < number_of_runs; seed_index++)
{
unsigned int seed_value
= seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count];
SCOPED_TRACE(testing::Message() << "with seed = " << seed_value);
SCOPED_TRACE(testing::Message() << "with size = " << size);

// Generate data
std::vector<key_type> output_key
= test_utils::get_random_data_wrapped<key_type>(size, 0, 100, seed_value);
std::vector<value_type> output_value
= test_utils::get_random_data_wrapped<value_type>(size, -100, 100, seed_value);

// Combine vectors to form pairs with key and value
std::vector<std::pair<key_type, value_type>> target(size);
for(unsigned i = 0; i < target.size(); i++)
target[i] = std::make_pair(output_key[i], output_value[i]);

// Calculate expected results on host
using key_value = std::pair<key_type, value_type>;
std::vector<key_value> expected(target);
constexpr bool descending = !std::is_same<binary_op_type, rocprim::less<key_type>>::value;
for(size_t i = 0; i < expected.size() / items_per_block; i++)
{
std::sort(expected.begin() + (i * items_per_block),
expected.begin() + ((i + 1) * items_per_block),
test_utils::key_value_comparator<key_type,
value_type,
descending,
0,
sizeof(key_type) * 8>());
}

// Preparing device
common::device_ptr<key_type> device_key_output(output_key);
common::device_ptr<value_type> device_value_output(output_value);

// Running kernel, ignored if invalid size
hipLaunchKernelGGL(HIP_KERNEL_NAME(sort_pairs_kernel_with_size<block_size,
items_per_thread,
key_type,
value_type,
algo,
binary_op_type>),
dim3(grid_size),
dim3(block_size),
0,
stream,
device_key_output.get(),
device_value_output.get(),
items_per_block);
HIP_CHECK(hipGetLastError());

// Reading results back
output_key = device_key_output.load();
output_value = device_value_output.load();

std::vector<key_type> expected_key(expected.size());
std::vector<value_type> expected_value(expected.size());
for(size_t i = 0; i < expected.size(); i++)
{
expected_key[i] = expected[i].first;
expected_value[i] = expected[i].second;
}

// Keys are sorted, Values order not guaranteed
// Sort subsets where key was the same to make sure all values are still present
using value_op_type = rocprim::less<value_type>;
using eq_op_type = rocprim::equal_to<key_type>;
value_op_type value_op;
eq_op_type eq_op;
for(size_t i = 0; i < output_key.size();)
{
auto j = i;
for(; j < output_key.size() && eq_op(output_key[j], output_key[i]); ++j)
{}
std::sort(output_value.begin() + i, output_value.begin() + j, value_op);
std::sort(expected_value.begin() + i, expected_value.begin() + j, value_op);
i = j;
}

test_utils::assert_eq(output_key, expected_key);
test_utils::assert_eq(output_value, expected_value);
}
}

#endif // TEST_ROCPRIM_TEST_BLOCK_SORT_HPP_
// This file is included multiple times in the test_block_sort_[algo].cpp file, because
// the test definitions below this header guard need to be compiled for each test suites:
Expand Down Expand Up @@ -429,7 +710,7 @@ typed_test_def(suite_name,
static constexpr const unsigned int block_size = TestFixture::block_size;
static constexpr const unsigned int items_per_thread = 4;
std::vector<size_t> sizes
= {0, 53, 512, 5000, 34567, (1 << 17) - 1220, 1134 * 256, (1 << 20) - 123};
= {0,53, 512, 5000, 34567, (1 << 17) - 1220, 1134 * 256, (1 << 20) - 123};
TestSortKey<block_size, items_per_thread, key_type, value_type, algo, binary_op_type>(sizes);
}

Expand All @@ -443,3 +724,69 @@ typed_test_def(suite_name, name_suffix, SortKeyValueDesc)
static constexpr const unsigned int items_per_thread = 1;
TestSortKeyValue<block_size, items_per_thread, key_type, value_type, algo, binary_op_type>();
}

typed_test_def(suite_name, name_suffix, SortKeyNoSize)
{
using key_type = typename TestFixture::key_type;
using value_type = typename TestFixture::value_type;
using binary_op_type = typename rocprim::greater<key_type>;
static constexpr const rocprim::block_sort_algorithm algo = TEST_BLOCK_SORT_ALGORITHM;
static constexpr const unsigned int block_size = TestFixture::block_size;
static constexpr const unsigned int items_per_thread = 1;
TestSortKeyNoSize<block_size, items_per_thread, key_type, value_type, algo, binary_op_type>();
}

typed_test_def(suite_name, name_suffix, SortKeyNoSizeMultipleItemsPerThread)
{
using key_type = typename TestFixture::key_type;
using value_type = typename TestFixture::value_type;
using binary_op_type = typename rocprim::greater<key_type>;
static constexpr const rocprim::block_sort_algorithm algo = TEST_BLOCK_SORT_ALGORITHM;
static constexpr const unsigned int block_size = TestFixture::block_size;
static constexpr const unsigned int items_per_thread = 4;
TestSortKeyNoSize<block_size, items_per_thread, key_type, value_type, algo, binary_op_type>();
}

typed_test_def(suite_name, name_suffix, SortKeyValueNoSize)
{
using key_type = typename TestFixture::key_type;
using value_type = typename TestFixture::value_type;
using binary_op_type = typename rocprim::less<key_type>;
static constexpr const rocprim::block_sort_algorithm algo = TEST_BLOCK_SORT_ALGORITHM;
static constexpr const unsigned int block_size = TestFixture::block_size;
static constexpr const unsigned int items_per_thread = 1;
TestSortKeyValueNoSize<block_size, items_per_thread, key_type, value_type, algo, binary_op_type>();
}

typed_test_def(suite_name, name_suffix, SortKeyValueNoSizeMultipleItemsPerThread)
{
using key_type = typename TestFixture::key_type;
using value_type = typename TestFixture::value_type;
using binary_op_type = typename rocprim::less<key_type>;
static constexpr const rocprim::block_sort_algorithm algo = TEST_BLOCK_SORT_ALGORITHM;
static constexpr const unsigned int block_size = TestFixture::block_size;
static constexpr const unsigned int items_per_thread = 4;
TestSortKeyValueNoSize<block_size, items_per_thread, key_type, value_type, algo, binary_op_type>();
}

typed_test_def(suite_name, name_suffix, SortKeyValueWithSize)
{
using key_type = typename TestFixture::key_type;
using value_type = typename TestFixture::value_type;
using binary_op_type = typename rocprim::less<key_type>;
static constexpr const rocprim::block_sort_algorithm algo = TEST_BLOCK_SORT_ALGORITHM;
static constexpr const unsigned int block_size = TestFixture::block_size;
static constexpr const unsigned int items_per_thread = 1;
TestSortKeyValueWithSize<block_size, items_per_thread, key_type, value_type, algo, binary_op_type>();
}

typed_test_def(suite_name, name_suffix, SortKeyValueWithSizeMultipleItemsPerThread)
{
using key_type = typename TestFixture::key_type;
using value_type = typename TestFixture::value_type;
using binary_op_type = typename rocprim::less<key_type>;
static constexpr const rocprim::block_sort_algorithm algo = TEST_BLOCK_SORT_ALGORITHM;
static constexpr const unsigned int block_size = TestFixture::block_size;
static constexpr const unsigned int items_per_thread = 4;
TestSortKeyValueWithSize<block_size, items_per_thread, key_type, value_type, algo, binary_op_type>();
}
Loading