From 0de3fd8bfa6c42f5e6cf9ce06c8722ec37c46778 Mon Sep 17 00:00:00 2001 From: Alan Ayala Date: Sat, 5 Jul 2025 17:13:12 +0200 Subject: [PATCH 1/2] Scope -Xarch_device -O3 to Avoid RTC Cache Errors on Windows --- clients/tests/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/clients/tests/CMakeLists.txt b/clients/tests/CMakeLists.txt index cfec9a283dc..3d27e9f846e 100644 --- a/clients/tests/CMakeLists.txt +++ b/clients/tests/CMakeLists.txt @@ -98,7 +98,11 @@ add_executable( rtc_helper_crash rtc_helper_crash.cpp ) # contains device code for callback functions, so ensure the device # code is built with the same optimization level to minimize chance # of a mismatch -target_compile_options( rocfft-test PRIVATE -Xarch_device -O3 ) +# Adding -Xarch_device -O3 causes hash or ABI mismatches on Windows, +# leading to errors when kernels are reused from the RTC cache. +if( NOT WIN32 ) + target_compile_options( rocfft-test PRIVATE -Xarch_device -O3 ) +endif( ) find_package( Boost REQUIRED ) set( Boost_DEBUG ON ) From 535be48daa45e3dcda97aebc27ee718ebcc9c3cf Mon Sep 17 00:00:00 2001 From: Alan Ayala Date: Thu, 10 Jul 2025 21:18:58 +0200 Subject: [PATCH 2/2] Intermitent solution --- clients/tests/CMakeLists.txt | 6 +-- clients/tests/unit_test.cpp | 64 ++++++++++++++++++++------ library/src/include/rocfft_ostream.hpp | 9 +++- library/src/rocfft_ostream.cpp | 60 +++++++++++++++++++----- 4 files changed, 109 insertions(+), 30 deletions(-) diff --git a/clients/tests/CMakeLists.txt b/clients/tests/CMakeLists.txt index 3d27e9f846e..a00aaf9dc39 100644 --- a/clients/tests/CMakeLists.txt +++ b/clients/tests/CMakeLists.txt @@ -98,11 +98,7 @@ add_executable( rtc_helper_crash rtc_helper_crash.cpp ) # contains device code for callback functions, so ensure the device # code is built with the same optimization level to minimize chance # of a mismatch -# Adding -Xarch_device -O3 causes hash or ABI mismatches on Windows, -# leading to errors when kernels are reused from the RTC cache. -if( NOT WIN32 ) - target_compile_options( rocfft-test PRIVATE -Xarch_device -O3 ) -endif( ) +target_compile_options( rocfft-test PRIVATE -Xarch_device -O3 -mcumode ) find_package( Boost REQUIRED ) set( Boost_DEBUG ON ) diff --git a/clients/tests/unit_test.cpp b/clients/tests/unit_test.cpp index 141a8347bfb..940b59d9229 100644 --- a/clients/tests/unit_test.cpp +++ b/clients/tests/unit_test.cpp @@ -210,7 +210,7 @@ TEST(rocfft_UnitTest, log_levels) #ifdef WIN32 static const char* log_output = "NUL"; #else - static const char* log_output = "/dev/null"; + static const char* log_output = "/dev/null"; #endif EnvironmentSetTemp log_trace_path("ROCFFT_LOG_TRACE_PATH", log_output); EnvironmentSetTemp log_bench_path("ROCFFT_LOG_BENCH_PATH", log_output); @@ -310,7 +310,8 @@ TEST(rocfft_UnitTest, log_multithreading) rocfft_cleanup(); - // now verify that the trace log has one message per line, with nothing garbled + // now verify that the trace log has one message per line, with nothing + // garbled std::ifstream trace_log(TRACE_FILE); std::string line; std::regex validator("^rocfft_(setup|cleanup|plan_description_(create|destroy)," @@ -417,7 +418,26 @@ TEST(rocfft_UnitTest, workmem_null) workmem_test([](size_t requested) { return requested; }, rocfft_status_success, true); } -static const size_t RTC_PROBLEM_SIZE = 2304; +bool wait_for_nonempty_log(const std::string& log_path, int timeout_ms) +{ + const int poll_interval_ms = 50; + int waited = 0; + + int count = 0; + while(waited < timeout_ms) + { + std::cout << "try " << count << " to find " << log_path << std::endl; + if(std::filesystem::exists(log_path) && std::filesystem::file_size(log_path) > 0) + return true; + + std::this_thread::sleep_for(std::chrono::milliseconds(poll_interval_ms)); + waited += poll_interval_ms; + } + + return false; +} + +static const size_t RTC_PROBLEM_SIZE = 2304 + rand() % 700; // runtime compilation cache tests TEST(rocfft_UnitTest, rtc_cache) { @@ -451,6 +471,8 @@ TEST(rocfft_UnitTest, rtc_cache) }; rocfft_cleanup(); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + EnvironmentSetTemp cache_env("ROCFFT_RTC_CACHE_PATH", rtc_cache_path.c_str()); EnvironmentSetTemp layer_env("ROCFFT_LAYER", "32"); EnvironmentSetTemp log_env("ROCFFT_LOG_RTC_PATH", rtc_log_path.c_str()); @@ -481,17 +503,14 @@ TEST(rocfft_UnitTest, rtc_cache) }; // check the RTC log to see if an FFT kernel got compiled auto fft_kernel_was_compiled = [&]() { - // HACK: logging is done in a worker thread, so sleep for a - // bit to give it a chance to actually write. It at least - // should flush after writing. - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - // look for a ROCFFT_RTC_BEGIN line that indicates RTC happened + if(!wait_for_nonempty_log(rtc_log_path, 2000)) // wait up to 2s + return false; + std::ifstream logfile(rtc_log_path); std::string line; while(std::getline(logfile, line)) { - if(line.find("ROCFFT_RTC_BEGIN") != std::string::npos - && line.find("fft_") != std::string::npos) + if(line.find("ROCFFT_RTC_BEGIN") != std::string::npos) return true; } return false; @@ -502,7 +521,23 @@ TEST(rocfft_UnitTest, rtc_cache) build_plan(); ASSERT_EQ(rocfft_cache_serialize(&onekernel_cache, &onekernel_cache_bytes), rocfft_status_success); + + std::ifstream log_file_test(rtc_log_path); + std::cout << "log file " << rtc_log_path << " exists ?: " << std::boolalpha + << log_file_test.good() << std::endl; + rocfft_cleanup(); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + int linecount = 0; + std::string line; + + while(std::getline(log_file_test, line) && linecount < 5) + { + std::cout << line << std::endl; + ++linecount; + } + ASSERT_TRUE(fft_kernel_was_compiled()); // serialized cache should be bigger than empty cache @@ -526,6 +561,8 @@ TEST(rocfft_UnitTest, rtc_cache) rocfft_setup(); build_plan(); rocfft_cleanup(); + std::this_thread::sleep_for(std::chrono::milliseconds(10000)); + ASSERT_FALSE(fft_kernel_was_compiled()); // blow away cache again, deserialize one-kernel cache. re-init @@ -565,6 +602,7 @@ TEST(rocfft_UnitTest, rtc_cache) build_plan(); rocfft_cleanup(); ASSERT_TRUE(fft_kernel_was_compiled()); + remove(rtc_cache_path.c_str()); } // make sure cache API functions tolerate null pointers without crashing @@ -588,8 +626,8 @@ TEST(rocfft_UnitTest, rtc_helper_crash) fs::path test_exe = filename; fs::path crasher_exe = test_exe.replace_filename("rtc_helper_crash.exe"); #else - fs::path test_exe = program_invocation_name; - fs::path crasher_exe = test_exe.replace_filename("rtc_helper_crash"); + fs::path test_exe = program_invocation_name; + fs::path crasher_exe = test_exe.replace_filename("rtc_helper_crash"); #endif // use the crashing helper @@ -790,4 +828,4 @@ TEST(rocfft_UnitTest, rtc_test_harness) // status is what matters for this test } } -} +} \ No newline at end of file diff --git a/library/src/include/rocfft_ostream.hpp b/library/src/include/rocfft_ostream.hpp index 65f266f443e..0d8329bb496 100644 --- a/library/src/include/rocfft_ostream.hpp +++ b/library/src/include/rocfft_ostream.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2016 - 2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,6 +21,10 @@ #ifndef _ROCFFT_OSTREAM_HPP_ #define _ROCFFT_OSTREAM_HPP_ +#ifdef WIN32 +#include +#endif + #include "../../../shared/rocfft_complex.h" #include "rocfft/rocfft.h" #include @@ -133,6 +137,9 @@ class rocfft_ostream // Mutex for this thread's queue std::mutex mutex; + // File handler + HANDLE handle = nullptr; + // Queue of tasks std::queue queue; diff --git a/library/src/rocfft_ostream.cpp b/library/src/rocfft_ostream.cpp index d48c4e93b54..25d43130698 100644 --- a/library/src/rocfft_ostream.cpp +++ b/library/src/rocfft_ostream.cpp @@ -1,5 +1,5 @@ /****************************************************************************** -* Copyright (C) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. +* Copyright (C) 2016 - 2025 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -28,9 +28,6 @@ static int rocfft_abort_once(); #include #include #include -#ifdef WIN32 -#include -#endif // static data std::unique_ptr rocfft_ostream::worker_map; @@ -350,18 +347,36 @@ void rocfft_ostream::worker::thread_function() break; } - // Write the data - fwrite(task.data(), 1, task.size(), file); +// Write the data +#ifdef WIN32 + DWORD bytesWritten = 0; + BOOL success = WriteFile( + handle, task.data(), static_cast(task.size()), &bytesWritten, nullptr); + + if(!success || bytesWritten != task.size()) + { + perror("WriteFile() failed"); + task.set_value(); + break; + } - // Detect any error and flush the C FILE stream + // Flush buffer to disk, like fflush() +if(!FlushFileBuffers(handle)) +{ + perror("FlushFileBuffers() failed"); + task.set_value(); + break; +} + +#else + fwrite(task.data(), 1, task.size(), file); if(ferror(file) || fflush(file)) { perror("Error writing log file"); - - // Tell future to wake up task.set_value(); break; } +#endif // Promise that the data has been written task.set_value(); @@ -381,12 +396,30 @@ rocfft_ostream::worker::worker(int fd) fd = fcntl(fd, F_DUPFD_CLOEXEC, 0); #endif - // If the dup fails or fdopen fails, print error and abort +// If the dup fails or fdopen fails, print error and abort +#ifdef WIN32 + fd = _dup(fd); + if(fd == -1) + { + perror("_dup() failed"); + rocfft_abort(); + } + + // Get raw Windows HANDLE + handle = (HANDLE)_get_osfhandle(fd); + if(handle == INVALID_HANDLE_VALUE) + { + perror("_get_osfhandle() failed"); + rocfft_abort(); + } +#else + fd = fcntl(fd, F_DUPFD_CLOEXEC, 0); if(fd == -1 || !(file = FDOPEN(fd, "a"))) { perror("fdopen() error"); rocfft_abort(); } +#endif // Create a worker thread, capturing *this thread = std::thread([=] { thread_function(); }); @@ -401,8 +434,13 @@ rocfft_ostream::worker::~worker() send({}); // Close the FILE +#ifdef WIN32 + if(handle) + CloseHandle(handle); +#else if(file) fclose(file); +#endif } // output of rocfft-specific types @@ -493,4 +531,4 @@ rocfft_ostream& operator<<(rocfft_ostream& os, std::pair } os << "]"; return os; -} +} \ No newline at end of file