Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
d157306
ported over fixes
NguyenNhuDi Aug 6, 2025
d5d5113
added back missing newlines
NguyenNhuDi Aug 6, 2025
9a47b5e
disabled test hipcub caching device when using valgrind
NguyenNhuDi Aug 6, 2025
44a3558
changed from using valgrind to cstdlib
NguyenNhuDi Aug 6, 2025
045c5ba
switched back to using valgrind header, but with a check for header now
NguyenNhuDi Aug 7, 2025
4fe9925
Merge branch 'develop' into users/NguyenNhuDi/hipcub-fix-mem-leaks
NguyenNhuDi Aug 20, 2025
bedd6ee
Merge branch 'develop' into users/NguyenNhuDi/hipcub-fix-mem-leaks
NguyenNhuDi Aug 20, 2025
dd01241
resolved merge conflict
NguyenNhuDi Aug 22, 2025
e629044
Merge branch 'develop' into users/NguyenNhuDi/hipcub-fix-mem-leaks
NguyenNhuDi Aug 22, 2025
e8274f2
Merge branch 'develop' into users/NguyenNhuDi/hipcub-fix-mem-leaks
NguyenNhuDi Aug 27, 2025
a0f2456
resolved git conflict
NguyenNhuDi Aug 28, 2025
e13f0eb
Merge branch 'develop' into users/NguyenNhuDi/hipcub-fix-mem-leaks
NguyenNhuDi Aug 28, 2025
a103d98
updated copyright
NguyenNhuDi Aug 28, 2025
23a5ab6
Merge branch 'develop' into users/NguyenNhuDi/hipcub-fix-mem-leaks
stanleytsang-amd Sep 3, 2025
2c6b2eb
Merge branch 'develop' into users/NguyenNhuDi/hipcub-fix-mem-leaks
NguyenNhuDi Sep 18, 2025
1c25d58
Merge branch 'develop' into users/NguyenNhuDi/hipcub-fix-mem-leaks
NguyenNhuDi Sep 26, 2025
ce305a1
Merge branch 'develop' into users/NguyenNhuDi/hipcub-fix-mem-leaks
NguyenNhuDi Sep 26, 2025
c7c9938
Merge branch 'develop' into users/NguyenNhuDi/hipcub-fix-mem-leaks
NguyenNhuDi Sep 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -665,4 +665,4 @@ TYPED_TEST(HipcubBlockRadixSort, SortKeysValues)
HIP_CHECK(hipFree(device_keys_output));
HIP_CHECK(hipFree(device_values_output));
}
}
}
6 changes: 6 additions & 0 deletions projects/hipcub/test/hipcub/test_hipcub_block_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2575,6 +2575,9 @@ TYPED_TEST(HipcubBlockScanInputArrayTests, ExclusiveScanReduce)
test_utils::assert_near(output_reductions,
expected_reductions,
test_utils::precision<T>::value * block_size);

HIP_CHECK(hipFree(device_output));
HIP_CHECK(hipFree(device_output_reductions));
}
}

Expand Down Expand Up @@ -3416,6 +3419,9 @@ TYPED_TEST(HipcubBlockScanInputArrayTests, ExclusiveSumReduce)
test_utils::assert_near(output_reductions,
expected_reductions,
test_utils::precision<T>::value * block_size);

HIP_CHECK(hipFree(device_output));
HIP_CHECK(hipFree(device_output_reductions));
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
* Modifications Copyright (c) 2019-2020, Advanced Micro Devices, Inc. All rights reserved.
* Modifications Copyright (c) 2019-2025, Advanced Micro Devices, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -29,8 +29,17 @@

#include "common_test_header.hpp"

#include "hipcub/util_allocator.hpp"
#include <cstdlib>
#include <hipcub/util_allocator.hpp>

#if __has_include(<valgrind/valgrind.h>)
#include <valgrind/valgrind.h>
#define HAS_VALGRIND_H 1
#else
#define HAS_VALGRIND_H 0
#endif

__global__
void EmptyKernel()
{}
Expand All @@ -39,6 +48,16 @@ void EmptyKernel()

TEST(HipcubCachingDeviceAllocatorTests, Test1)
{

#if HAS_VALGRIND_H
// This test is very timing sensitive. Valgrind significantly slows down
// kernel execution and therefore messes up the timing of the test.
// If valgrind is being used we should disable this test otherwise it will fail
if (RUNNING_ON_VALGRIND) {
GTEST_SKIP() << "Skipping test under Valgrind";
}
#endif //HAS_VALGRIND_H

// Get number of GPUs and current GPU
int num_gpus;
int initial_gpu;
Expand Down
19 changes: 16 additions & 3 deletions projects/hipcub/test/hipcub/test_hipcub_device_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,14 @@ TYPED_TEST(DeviceBatchCopyTests, SizeAndTypeVariation)
if(i < num_tlev_buffers)
{
size = test_utils::get_random_value<buffer_size_type>(1, wlev_min_size - 1, rng());
} else if(i < num_tlev_buffers + num_wlev_buffers)
}
else if(i < num_tlev_buffers + num_wlev_buffers)
{
size = test_utils::get_random_value<buffer_size_type>(wlev_min_size,
blev_min_size - 1,
rng());
} else
}
else
{
size = test_utils::get_random_value<buffer_size_type>(blev_min_size, max_size, rng());
}
Expand Down Expand Up @@ -256,7 +258,8 @@ TYPED_TEST(DeviceBatchCopyTests, SizeAndTypeVariation)
{
src_offsets = shuffled_exclusive_scan<buffer_offset_type>(h_buffer_num_elements, rng);
dst_offsets = shuffled_exclusive_scan<buffer_offset_type>(h_buffer_num_elements, rng);
} else
}
else
{
src_offsets = std::vector<buffer_offset_type>(num_buffers);
dst_offsets = std::vector<buffer_offset_type>(num_buffers);
Expand Down Expand Up @@ -320,4 +323,14 @@ TYPED_TEST(DeviceBatchCopyTests, SizeAndTypeVariation)
ASSERT_TRUE(test_utils::bit_equal(h_input[input_index], h_output[output_index]));
}
}

// De-allocate memory.
HIP_CHECK(hipFree(d_input));
HIP_CHECK(hipFree(d_output));

HIP_CHECK(hipFree(d_buffer_srcs));
HIP_CHECK(hipFree(d_buffer_dsts));
HIP_CHECK(hipFree(d_buffer_sizes));

HIP_CHECK(hipFree(d_temp_storage));
}
9 changes: 9 additions & 0 deletions projects/hipcub/test/hipcub/test_hipcub_device_for.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ TEST(HipcubDeviceForTests, ForEachTempStore)

HIP_CHECK(hipFree(d_input));
HIP_CHECK(hipFree(d_temp_storage));
HIP_CHECK(hipFree(d_count));
}
}
}
Expand Down Expand Up @@ -493,6 +494,7 @@ TYPED_TEST(HipcubDeviceForTests, ForEachCopy)
}

HIP_CHECK(hipFree(d_input));
HIP_CHECK(hipFree(d_count));
}
}

Expand Down Expand Up @@ -577,6 +579,7 @@ TEST(HipcubDeviceForTests, ForEachCopyTempStore)

HIP_CHECK(hipFree(d_input));
HIP_CHECK(hipFree(d_temp_storage));
HIP_CHECK(hipFree(d_count));
}
}
}
Expand Down Expand Up @@ -655,6 +658,7 @@ TYPED_TEST(HipcubDeviceForTests, ForEachCopyN)
}

HIP_CHECK(hipFree(d_input));
HIP_CHECK(hipFree(d_count));
}
}

Expand Down Expand Up @@ -709,6 +713,8 @@ TEST(HipcubDeviceForTests, ForCountingIterator)

// Check if have same number of odd numbers
ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(h_count, expected));

HIP_CHECK(hipFree(d_count));
}
}
}
Expand Down Expand Up @@ -761,6 +767,8 @@ TEST(HipcubDeviceForTests, ForCopyCountingIterator)

// Check if have same number of odd numbers
ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(h_count, expected));

HIP_CHECK(hipFree(d_count));
}
}
}
Expand Down Expand Up @@ -840,6 +848,7 @@ TEST(HipcubDeviceForTests, ForEachCopyNTempStore)

HIP_CHECK(hipFree(d_input));
HIP_CHECK(hipFree(d_temp_storage));
HIP_CHECK(hipFree(d_count));
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions projects/hipcub/test/hipcub/test_hipcub_device_memcpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,4 +327,14 @@ TYPED_TEST(DeviceBatchMemcpyTests, SizeAndTypeVariation)
ASSERT_TRUE(test_utils::bit_equal(h_input[input_index], h_output[output_index]));
}
}

// De-allocate memory.
HIP_CHECK(hipFree(d_input));
HIP_CHECK(hipFree(d_output));

HIP_CHECK(hipFree(d_buffer_srcs));
HIP_CHECK(hipFree(d_buffer_dsts));
HIP_CHECK(hipFree(d_buffer_sizes));

HIP_CHECK(hipFree(d_temp_storage));
}
121 changes: 75 additions & 46 deletions projects/hipcub/test/hipcub/test_hipcub_device_spmv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,59 +70,53 @@ using HipcubDeviceSpmvTestsParams = ::testing::Types<DeviceSpmvParams<float, 4,
DeviceSpmvParams<float, 4, 0, 0, 0, true>>;

template<typename T, typename OffsetType>
static void
generate_matrix(CooMatrix<T, OffsetType> &coo_matrix,
int32_t grid2d,
int32_t grid3d,
int32_t wheel,
int32_t dense)
static void generate_matrix(CooMatrix<T, OffsetType>& coo_matrix,
int32_t grid2d,
int32_t grid3d,
int32_t wheel,
int32_t dense)
{
if (grid2d > 0)
if(grid2d > 0)
{
// Generate 2D lattice
coo_matrix.InitGrid2d(grid2d, false);
}
else if (grid3d > 0)
else if(grid3d > 0)
{
// Generate 3D lattice
coo_matrix.InitGrid3d(grid3d, false);
}
else if (wheel > 0)
else if(wheel > 0)
{
// Generate wheel graph
coo_matrix.InitWheel(wheel);
}
else if (dense > 0)
else if(dense > 0)
{
#if 0
#if 0
// Generate dense graph
OffsetType size = 1 << 24; // 16M nnz
args.GetCmdLineArgument("size", size);

OffsetType rows = size / dense;
printf("dense_%d_x_%d, ", rows, dense); fflush(stdout);
coo_matrix.InitDense(rows, dense);
#endif
#endif
}
}

template <
typename T,
typename OffsetType>
void SpmvGold(
CsrMatrix<T, OffsetType>& a,
const T* vector_x,
const T* vector_y_in,
T* vector_y_out,
T alpha,
T beta)
template<typename T, typename OffsetType>
void SpmvGold(CsrMatrix<T, OffsetType>& a,
const T* vector_x,
const T* vector_y_in,
T* vector_y_out,
T alpha,
T beta)
{
for (OffsetType row = 0; row < a.num_rows; ++row)
for(OffsetType row = 0; row < a.num_rows; ++row)
{
T partial = beta * vector_y_in[row];
for (OffsetType offset = a.row_offsets[row];
offset < a.row_offsets[row + 1];
++offset)
for(OffsetType offset = a.row_offsets[row]; offset < a.row_offsets[row + 1]; ++offset)
{
partial += alpha * a.values[offset] * vector_x[a.column_indices[offset]];
}
Expand All @@ -138,8 +132,8 @@ TYPED_TEST(HipcubDeviceSpmvTests, Spmv)
SCOPED_TRACE(testing::Message() << "with device_id= " << device_id);
HIP_CHECK(hipSetDevice(device_id));

using T = typename TestFixture::value_type;
using OffsetType = int32_t;
using T = typename TestFixture::value_type;
using OffsetType = int32_t;
constexpr int32_t grid_2d = TestFixture::grid_2d;
constexpr int32_t grid_3d = TestFixture::grid_3d;
constexpr int32_t wheel = TestFixture::wheel;
Expand All @@ -160,14 +154,14 @@ TYPED_TEST(HipcubDeviceSpmvTests, Spmv)
csr_matrix.FromCoo(coo_matrix);

// Allocate input and output vectors
T* vector_x = new T[csr_matrix.num_cols];
T* vector_y_in = new T[csr_matrix.num_rows];
T* vector_y_out = new T[csr_matrix.num_rows];
T* vector_x = new T[csr_matrix.num_cols];
T* vector_y_in = new T[csr_matrix.num_rows];
T* vector_y_out = new T[csr_matrix.num_rows];

for (int col = 0; col < csr_matrix.num_cols; ++col)
for(int col = 0; col < csr_matrix.num_cols; ++col)
vector_x[col] = 1.0;

for (int row = 0; row < csr_matrix.num_rows; ++row)
for(int row = 0; row < csr_matrix.num_rows; ++row)
vector_y_in[row] = 1.0;

// Compute reference answer
Expand All @@ -178,23 +172,43 @@ TYPED_TEST(HipcubDeviceSpmvTests, Spmv)
hipcub::DeviceSpmv::SpmvParams<T, OffsetType> params{};
HIPCUB_CLANG_SUPPRESS_DEPRECATED_POP

HIP_CHECK(g_allocator.DeviceAllocate((void **) &params.d_values, sizeof(T) * csr_matrix.num_nonzeros));
HIP_CHECK(g_allocator.DeviceAllocate((void **) &params.d_row_end_offsets, sizeof(OffsetType) * (csr_matrix.num_rows + 1)));
HIP_CHECK(g_allocator.DeviceAllocate((void **) &params.d_column_indices, sizeof(OffsetType) * csr_matrix.num_nonzeros));
HIP_CHECK(g_allocator.DeviceAllocate((void **) &params.d_vector_x, sizeof(T) * csr_matrix.num_cols));
HIP_CHECK(g_allocator.DeviceAllocate((void **) &params.d_vector_y, sizeof(T) * csr_matrix.num_rows));
HIP_CHECK(
g_allocator.DeviceAllocate((void**)&params.d_values, sizeof(T) * csr_matrix.num_nonzeros));
HIP_CHECK(g_allocator.DeviceAllocate((void**)&params.d_row_end_offsets,
sizeof(OffsetType) * (csr_matrix.num_rows + 1)));
HIP_CHECK(g_allocator.DeviceAllocate((void**)&params.d_column_indices,
sizeof(OffsetType) * csr_matrix.num_nonzeros));
HIP_CHECK(
g_allocator.DeviceAllocate((void**)&params.d_vector_x, sizeof(T) * csr_matrix.num_cols));
HIP_CHECK(
g_allocator.DeviceAllocate((void**)&params.d_vector_y, sizeof(T) * csr_matrix.num_rows));

params.num_rows = csr_matrix.num_rows;
params.num_cols = csr_matrix.num_cols;
params.num_nonzeros = csr_matrix.num_nonzeros;
params.alpha = alpha_const;
params.beta = beta_const;

HIP_CHECK(hipMemcpy(params.d_values, csr_matrix.values, sizeof(T) * csr_matrix.num_nonzeros, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(params.d_row_end_offsets, csr_matrix.row_offsets, sizeof(OffsetType) * (csr_matrix.num_rows + 1), hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(params.d_column_indices, csr_matrix.column_indices, sizeof(OffsetType) * csr_matrix.num_nonzeros, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(params.d_vector_x, vector_x, sizeof(T) * csr_matrix.num_cols, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(params.d_vector_y, vector_y_in, sizeof(T) * csr_matrix.num_rows, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(params.d_values,
csr_matrix.values,
sizeof(T) * csr_matrix.num_nonzeros,
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(params.d_row_end_offsets,
csr_matrix.row_offsets,
sizeof(OffsetType) * (csr_matrix.num_rows + 1),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(params.d_column_indices,
csr_matrix.column_indices,
sizeof(OffsetType) * csr_matrix.num_nonzeros,
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(params.d_vector_x,
vector_x,
sizeof(T) * csr_matrix.num_cols,
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(params.d_vector_y,
vector_y_in,
sizeof(T) * csr_matrix.num_rows,
hipMemcpyHostToDevice));

// Allocate temporary storage
size_t temp_storage_bytes = 0;
Expand All @@ -221,7 +235,7 @@ TYPED_TEST(HipcubDeviceSpmvTests, Spmv)
HIP_CHECK(hipDeviceSynchronize());

test_utils::GraphHelper gHelper;
if (TestFixture::use_graphs)
if(TestFixture::use_graphs)
gHelper.startStreamCapture(stream);

HIPCUB_CLANG_SUPPRESS_DEPRECATED_PUSH
Expand All @@ -238,10 +252,13 @@ TYPED_TEST(HipcubDeviceSpmvTests, Spmv)
stream));
HIPCUB_CLANG_SUPPRESS_DEPRECATED_POP

if (TestFixture::use_graphs)
if(TestFixture::use_graphs)
gHelper.createAndLaunchGraph(stream);

HIP_CHECK(hipMemcpy(vector_y_in, params.d_vector_y, sizeof(T) * params.num_rows, hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(vector_y_in,
params.d_vector_y,
sizeof(T) * params.num_rows,
hipMemcpyDeviceToHost));

HIP_CHECK(hipPeekAtLastError());
HIP_CHECK(hipDeviceSynchronize());
Expand All @@ -260,4 +277,16 @@ TYPED_TEST(HipcubDeviceSpmvTests, Spmv)
gHelper.cleanupGraphHelper();
HIP_CHECK(hipStreamDestroy(stream));
}

// De-allocate input and output vectors
delete[] vector_x;
delete[] vector_y_in;
delete[] vector_y_out;

HIP_CHECK(g_allocator.DeviceFree(params.d_values));
HIP_CHECK(g_allocator.DeviceFree(params.d_row_end_offsets));
HIP_CHECK(g_allocator.DeviceFree(params.d_column_indices));
HIP_CHECK(g_allocator.DeviceFree(params.d_vector_x));
HIP_CHECK(g_allocator.DeviceFree(params.d_vector_y));
HIP_CHECK(g_allocator.DeviceFree(d_temp_storage));
}
Loading