From 869f2e32776f0a38c181c61e7f4deba081eff304 Mon Sep 17 00:00:00 2001 From: Wayne Franz Date: Thu, 22 May 2025 15:25:35 -0400 Subject: [PATCH] Reset internal hip error for tests that run out of memory The behaviour of hipGetLastError is changing in HIP 7.0. Previously the error that was reported was cleared on each HIP API call. This means that hipGetLastError reported any error that occurred during the last HIP API call. Moving forward, the error that's reported will only be cleared on each call to hipGetLastError. This means that hipGetLastError will report any error that has occurred since the last call to hipGetError. Some of our tests rely on observing a return value of hipErrorOutOfMemory from hipMalloc when an allocation is too large for a given GPU architecture's memory system. This sets the internal HIP error, and it's not cleared before subsequent tests call hipGetLastError, causing them to fail. This change adds extra calls to hipGetLastError to clear the error (for future tests) in cases where tests run out of memory. --- projects/rocprim/common/utils_device_ptr.hpp | 1 + projects/rocprim/test/common_test_header.hpp | 1 + projects/rocprim/test/rocprim/test_device_merge_sort.cpp | 2 ++ 3 files changed, 4 insertions(+) diff --git a/projects/rocprim/common/utils_device_ptr.hpp b/projects/rocprim/common/utils_device_ptr.hpp index e0ed783f43a..b1be71b9b30 100644 --- a/projects/rocprim/common/utils_device_ptr.hpp +++ b/projects/rocprim/common/utils_device_ptr.hpp @@ -294,6 +294,7 @@ class device_ptr = common::hipMallocHelper(&device_temp_ptr, new_number_of_ele * value_size); if(err == hipErrorOutOfMemory) { + (void) hipGetLastError(); // reset internally recorded HIP error return false; } HIP_CHECK(err); diff --git a/projects/rocprim/test/common_test_header.hpp b/projects/rocprim/test/common_test_header.hpp index fe845c4e256..554160ede6a 100755 --- a/projects/rocprim/test/common_test_header.hpp +++ b/projects/rocprim/test/common_test_header.hpp @@ -50,6 +50,7 @@ hipError_t error = condition; \ if(error == hipErrorOutOfMemory) \ { \ + (void) hipGetLastError(); \ std::cout << "Out of memory. Skipping size = " << size << std::endl; \ break; \ } \ diff --git a/projects/rocprim/test/rocprim/test_device_merge_sort.cpp b/projects/rocprim/test/rocprim/test_device_merge_sort.cpp index 4b315d4a3be..d5996ecaf38 100644 --- a/projects/rocprim/test/rocprim/test_device_merge_sort.cpp +++ b/projects/rocprim/test/rocprim/test_device_merge_sort.cpp @@ -390,6 +390,7 @@ void testLargeIndices() hipError_t malloc_status = common::hipMallocHelper(&d_output, size * sizeof(*d_output)); if(malloc_status == hipErrorOutOfMemory) { + (void) hipGetLastError(); // reset internally recorded HIP error std::cout << "Out of memory. Skipping size = " << size << std::endl; break; } @@ -418,6 +419,7 @@ void testLargeIndices() malloc_status = common::hipMallocHelper(&d_temp_storage, temp_storage_size_bytes); if(malloc_status == hipErrorOutOfMemory) { + (void) hipGetLastError(); // reset internally recorded HIP error std::cout << "Out of memory. Skipping size = " << size << std::endl; HIP_CHECK(hipFree(d_output)); break;