From 8114f174f82764a3e238909f50ea50b265d0d609 Mon Sep 17 00:00:00 2001 From: Roger Sanders Date: Tue, 14 Oct 2025 09:53:34 +1100 Subject: [PATCH] ld: Added fallback thread synchronisation for libc++ workaround on Windows --- ares/md/mcd/mcd.hpp | 9 +++++ ares/md/mcd/megald.cpp | 85 +++++++++++++++++++++++++++++++++++++++++ ares/md/md.hpp | 8 ++++ ares/pce/pcd/ldrom2.cpp | 85 +++++++++++++++++++++++++++++++++++++++++ ares/pce/pcd/pcd.hpp | 9 +++++ ares/pce/pce.hpp | 8 ++++ 6 files changed, 204 insertions(+) diff --git a/ares/md/mcd/mcd.hpp b/ares/md/mcd/mcd.hpp index b425da6f9..48af7380a 100644 --- a/ares/md/mcd/mcd.hpp +++ b/ares/md/mcd/mcd.hpp @@ -522,6 +522,15 @@ struct MCD : M68000, Thread { std::atomic_flag videoFramePrefetchThreadStarted; std::atomic_flag videoFramePrefetchThreadShutdownRequested; std::atomic_flag videoFramePrefetchThreadShutdownComplete; +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + // Workaround for bad performance on Windows targets under MSYS2 with libc++ due to https://github.com/llvm/llvm-project/issues/127221 + std::mutex videoFramePrefetchMutex; + std::condition_variable notifyVideoFramePrefetchPending; + std::condition_variable notifyVideoFramePrefetchComplete; + std::condition_variable notifyVideoFramePrefetchThreadStarted; + std::condition_variable notifyVideoFramePrefetchThreadShutdownRequested; + std::condition_variable notifyVideoFramePrefetchThreadShutdownComplete; +#endif const unsigned char* videoFramePrefetchTarget; std::vector videoFramePrefetchBuffer; } ld; diff --git a/ares/md/mcd/megald.cpp b/ares/md/mcd/megald.cpp index 0025933cc..5a0227f48 100644 --- a/ares/md/mcd/megald.cpp +++ b/ares/md/mcd/megald.cpp @@ -149,16 +149,37 @@ auto MCD::LD::load(string location) -> void { videoFramePrefetchThreadShutdownComplete.clear(); std::thread workerThread(std::bind(std::mem_fn(&MCD::LD::videoFramePrefetchThread), this)); workerThread.detach(); +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + while (!videoFramePrefetchThreadStarted.test()) { + notifyVideoFramePrefetchThreadStarted.wait(lock); + } + } +#else videoFramePrefetchThreadStarted.wait(false); +#endif } auto MCD::LD::unload() -> void { // Request the prefetch background thread to terminate, and wait for it to complete. if (videoFramePrefetchThreadStarted.test()) { +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchThreadShutdownRequested.test_and_set(); + videoFramePrefetchPending.test_and_set(); + notifyVideoFramePrefetchPending.notify_all(); + while (!videoFramePrefetchThreadShutdownComplete.test()) { + notifyVideoFramePrefetchThreadShutdownComplete.wait(lock); + } + } +#else videoFramePrefetchThreadShutdownRequested.test_and_set(); videoFramePrefetchPending.test_and_set(); videoFramePrefetchPending.notify_all(); videoFramePrefetchThreadShutdownComplete.wait(false); +#endif } // Close the mmi file @@ -2773,11 +2794,26 @@ auto MCD::LD::loadCurrentVideoFrameIntoBuffer() -> void { // when it starts the new one, as it is our responsibility to clear the prefetch complete state. This means if we // don't wait for the original prefetch to complete here, it would trigger a race condition for the load of the // following frame. +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + while (videoFramePrefetchPending.test()) { + notifyVideoFramePrefetchPending.wait(lock); + } + if (videoFramePrefetchTarget != nullptr) { + while (!videoFramePrefetchComplete.test()) { + notifyVideoFramePrefetchComplete.wait(lock); + } + videoFramePrefetchComplete.clear(); + } + } +#else videoFramePrefetchPending.wait(true); if (videoFramePrefetchTarget != nullptr) { videoFramePrefetchComplete.wait(false); videoFramePrefetchComplete.clear(); } +#endif // If the prefetch operation is for the correct frame, we've just waited for it to complete above, so we now swap the // prefetch buffer with the build frame buffer. Note that this will exchange memory buffer pointers and not copy the @@ -2841,19 +2877,44 @@ auto MCD::LD::loadCurrentVideoFrameIntoBuffer() -> void { return; } videoFramePrefetchTarget = videoFrameCompressed; +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchPending.test_and_set(); + notifyVideoFramePrefetchPending.notify_all(); + } +#else videoFramePrefetchPending.test_and_set(); videoFramePrefetchPending.notify_all(); +#endif } auto MCD::LD::videoFramePrefetchThread() -> void { // Trigger a notification that this worker thread has started +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchThreadStarted.test_and_set(); + notifyVideoFramePrefetchThreadStarted.notify_all(); + } +#else videoFramePrefetchThreadStarted.test_and_set(); videoFramePrefetchThreadStarted.notify_all(); +#endif // Perform prefetch requests as they arrive, and terminate the thread when requested. while (!videoFramePrefetchThreadShutdownRequested.test()) { // Wait for a prefetch request to arrive +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + while (!videoFramePrefetchPending.test()) { + notifyVideoFramePrefetchPending.wait(lock); + } + } +#else videoFramePrefetchPending.wait(false); +#endif // If this thread has been requested to terminate, break out of the prefetch loop. if (videoFramePrefetchThreadShutdownRequested.test()) { @@ -2861,8 +2922,16 @@ auto MCD::LD::videoFramePrefetchThread() -> void { } // Trigger a notification that a prefetch request is no longer pending +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchPending.clear(); + notifyVideoFramePrefetchPending.notify_all(); + } +#else videoFramePrefetchPending.clear(); videoFramePrefetchPending.notify_all(); +#endif // Allocate memory for the prefetch frame buffer if it's currently empty if (videoFramePrefetchBuffer.empty()) { @@ -2874,13 +2943,29 @@ auto MCD::LD::videoFramePrefetchThread() -> void { qoi2_decode_data(videoFramePrefetchTarget + QON_FRAME_SIZE_SIZE, frameSizeCompressed, &video.videoFrameHeader, nullptr, videoFramePrefetchBuffer.data(), 3); // Trigger a notification that the prefetch operation is complete +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchComplete.test_and_set(); + notifyVideoFramePrefetchComplete.notify_all(); + } +#else videoFramePrefetchComplete.test_and_set(); videoFramePrefetchComplete.notify_all(); +#endif } // Trigger a notification that this worker thread has shut down +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchThreadShutdownComplete.test_and_set(); + notifyVideoFramePrefetchThreadShutdownComplete.notify_all(); + } +#else videoFramePrefetchThreadShutdownComplete.test_and_set(); videoFramePrefetchThreadShutdownComplete.notify_all(); +#endif } auto MCD::LD::decodeBiphaseCodeFromScanline(int lineNo) -> u32 { diff --git a/ares/md/md.hpp b/ares/md/md.hpp index d3ea4b692..b47be9a4f 100644 --- a/ares/md/md.hpp +++ b/ares/md/md.hpp @@ -1,6 +1,10 @@ #pragma once //started: 2016-07-08 +#if !defined(USE_ATOMIC_FLAG_NOTIFY_FALLBACK) && !defined(_MSC_VER) && defined(_WIN32) +#define USE_ATOMIC_FLAG_NOTIFY_FALLBACK +#endif + #include #include #include @@ -8,6 +12,10 @@ #include #include #include +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK +#include +#include +#endif #include #include diff --git a/ares/pce/pcd/ldrom2.cpp b/ares/pce/pcd/ldrom2.cpp index 0a3a269ce..5dac6c03a 100644 --- a/ares/pce/pcd/ldrom2.cpp +++ b/ares/pce/pcd/ldrom2.cpp @@ -149,16 +149,37 @@ auto PCD::LD::load(string location) -> void { videoFramePrefetchThreadShutdownComplete.clear(); std::thread workerThread(std::bind(std::mem_fn(&PCD::LD::videoFramePrefetchThread), this)); workerThread.detach(); +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + while (!videoFramePrefetchThreadStarted.test()) { + notifyVideoFramePrefetchThreadStarted.wait(lock); + } + } +#else videoFramePrefetchThreadStarted.wait(false); +#endif } auto PCD::LD::unload() -> void { // Request the prefetch background thread to terminate, and wait for it to complete. if (videoFramePrefetchThreadStarted.test()) { +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchThreadShutdownRequested.test_and_set(); + videoFramePrefetchPending.test_and_set(); + notifyVideoFramePrefetchPending.notify_all(); + while (!videoFramePrefetchThreadShutdownComplete.test()) { + notifyVideoFramePrefetchThreadShutdownComplete.wait(lock); + } + } +#else videoFramePrefetchThreadShutdownRequested.test_and_set(); videoFramePrefetchPending.test_and_set(); videoFramePrefetchPending.notify_all(); videoFramePrefetchThreadShutdownComplete.wait(false); +#endif } // Close the mmi file @@ -2777,11 +2798,26 @@ auto PCD::LD::loadCurrentVideoFrameIntoBuffer() -> void { // when it starts the new one, as it is our responsibility to clear the prefetch complete state. This means if we // don't wait for the original prefetch to complete here, it would trigger a race condition for the load of the // following frame. +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + while (videoFramePrefetchPending.test()) { + notifyVideoFramePrefetchPending.wait(lock); + } + if (videoFramePrefetchTarget != nullptr) { + while (!videoFramePrefetchComplete.test()) { + notifyVideoFramePrefetchComplete.wait(lock); + } + videoFramePrefetchComplete.clear(); + } + } +#else videoFramePrefetchPending.wait(true); if (videoFramePrefetchTarget != nullptr) { videoFramePrefetchComplete.wait(false); videoFramePrefetchComplete.clear(); } +#endif // If the prefetch operation is for the correct frame, we've just waited for it to complete above, so we now swap the // prefetch buffer with the build frame buffer. Note that this will exchange memory buffer pointers and not copy the @@ -2845,19 +2881,44 @@ auto PCD::LD::loadCurrentVideoFrameIntoBuffer() -> void { return; } videoFramePrefetchTarget = videoFrameCompressed; +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchPending.test_and_set(); + notifyVideoFramePrefetchPending.notify_all(); + } +#else videoFramePrefetchPending.test_and_set(); videoFramePrefetchPending.notify_all(); +#endif } auto PCD::LD::videoFramePrefetchThread() -> void { // Trigger a notification that this worker thread has started +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchThreadStarted.test_and_set(); + notifyVideoFramePrefetchThreadStarted.notify_all(); + } +#else videoFramePrefetchThreadStarted.test_and_set(); videoFramePrefetchThreadStarted.notify_all(); +#endif // Perform prefetch requests as they arrive, and terminate the thread when requested. while (!videoFramePrefetchThreadShutdownRequested.test()) { // Wait for a prefetch request to arrive +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + while (!videoFramePrefetchPending.test()) { + notifyVideoFramePrefetchPending.wait(lock); + } + } +#else videoFramePrefetchPending.wait(false); +#endif // If this thread has been requested to terminate, break out of the prefetch loop. if (videoFramePrefetchThreadShutdownRequested.test()) { @@ -2865,8 +2926,16 @@ auto PCD::LD::videoFramePrefetchThread() -> void { } // Trigger a notification that a prefetch request is no longer pending +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchPending.clear(); + notifyVideoFramePrefetchPending.notify_all(); + } +#else videoFramePrefetchPending.clear(); videoFramePrefetchPending.notify_all(); +#endif // Allocate memory for the prefetch frame buffer if it's currently empty if (videoFramePrefetchBuffer.empty()) { @@ -2878,13 +2947,29 @@ auto PCD::LD::videoFramePrefetchThread() -> void { qoi2_decode_data(videoFramePrefetchTarget + QON_FRAME_SIZE_SIZE, frameSizeCompressed, &video.videoFrameHeader, nullptr, videoFramePrefetchBuffer.data(), 3); // Trigger a notification that the prefetch operation is complete +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchComplete.test_and_set(); + notifyVideoFramePrefetchComplete.notify_all(); + } +#else videoFramePrefetchComplete.test_and_set(); videoFramePrefetchComplete.notify_all(); +#endif } // Trigger a notification that this worker thread has shut down +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + { + std::unique_lock lock(videoFramePrefetchMutex); + videoFramePrefetchThreadShutdownComplete.test_and_set(); + notifyVideoFramePrefetchThreadShutdownComplete.notify_all(); + } +#else videoFramePrefetchThreadShutdownComplete.test_and_set(); videoFramePrefetchThreadShutdownComplete.notify_all(); +#endif } auto PCD::LD::decodeBiphaseCodeFromScanline(int lineNo) -> u32 { diff --git a/ares/pce/pcd/pcd.hpp b/ares/pce/pcd/pcd.hpp index c51ebebf8..93819ccb1 100644 --- a/ares/pce/pcd/pcd.hpp +++ b/ares/pce/pcd/pcd.hpp @@ -482,6 +482,15 @@ struct PCD : Thread { std::atomic_flag videoFramePrefetchThreadStarted; std::atomic_flag videoFramePrefetchThreadShutdownRequested; std::atomic_flag videoFramePrefetchThreadShutdownComplete; +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK + // Workaround for bad performance on Windows targets under MSYS2 with libc++ due to https://github.com/llvm/llvm-project/issues/127221 + std::mutex videoFramePrefetchMutex; + std::condition_variable notifyVideoFramePrefetchPending; + std::condition_variable notifyVideoFramePrefetchComplete; + std::condition_variable notifyVideoFramePrefetchThreadStarted; + std::condition_variable notifyVideoFramePrefetchThreadShutdownRequested; + std::condition_variable notifyVideoFramePrefetchThreadShutdownComplete; +#endif const unsigned char* videoFramePrefetchTarget; std::vector videoFramePrefetchBuffer; } ld; diff --git a/ares/pce/pce.hpp b/ares/pce/pce.hpp index 76ec6c49c..42f2b13d1 100644 --- a/ares/pce/pce.hpp +++ b/ares/pce/pce.hpp @@ -1,12 +1,20 @@ #pragma once //started: 2017-01-11 +#if !defined(USE_ATOMIC_FLAG_NOTIFY_FALLBACK) && !defined(_MSC_VER) && defined(_WIN32) +#define USE_ATOMIC_FLAG_NOTIFY_FALLBACK +#endif + #include #include #include #include #include #include +#ifdef USE_ATOMIC_FLAG_NOTIFY_FALLBACK +#include +#include +#endif #include #include