From f9673ad8c83775430bd5e31f79554b56d034cfcf Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Tue, 28 Jun 2022 10:50:37 +0200 Subject: [PATCH] WIP: Reducing cache choerency traffic under contention for spinlock Use the code from [1] to implement the spinlock based on std::atomic instead of std::atomic_flag. While the former is not necessarily lock-free, it is on the majority of platforms. A static assert is added to catch this on platforms that don't have this - we could potentially use the older implementation on those instead then. WIP because I don't have a good scientific benchmark for this yet. I tested it in our realworld application, and it seems to have slightly reduced the load of the spinlock, but not in a really large way... See also: https://github.com/efficient/libcuckoo/issues/146 [1]: https://rigtorp.se/spinlock/ --- libcuckoo/cuckoohash_map.hh | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/libcuckoo/cuckoohash_map.hh b/libcuckoo/cuckoohash_map.hh index 4cfd2ac..e063704 100644 --- a/libcuckoo/cuckoohash_map.hh +++ b/libcuckoo/cuckoohash_map.hh @@ -803,13 +803,11 @@ private: LIBCUCKOO_SQUELCH_PADDING_WARNING class LIBCUCKOO_ALIGNAS(64) spinlock { public: - spinlock() : elem_counter_(0), is_migrated_(true) { lock_.clear(); } + spinlock() : elem_counter_(0), is_migrated_(true) {} spinlock(const spinlock &other) noexcept : elem_counter_(other.elem_counter()), - is_migrated_(other.is_migrated()) { - lock_.clear(); - } + is_migrated_(other.is_migrated()) {} spinlock &operator=(const spinlock &other) noexcept { elem_counter() = other.elem_counter(); @@ -818,14 +816,27 @@ private: } void lock() noexcept { - while (lock_.test_and_set(std::memory_order_acq_rel)) - ; + for (;;) { + // Optimistically assume the lock is free on the first try + if (!lock_.exchange(true, std::memory_order_acquire)) { + return; + } + // Wait for lock to be released without generating cache misses + while (lock_.load(std::memory_order_relaxed)) { + // Issue X86 PAUSE or ARM YIELD instruction to reduce contention + // between hyper-threads + __builtin_ia32_pause(); + } + } } - void unlock() noexcept { lock_.clear(std::memory_order_release); } + void unlock() noexcept { lock_.store(false, std::memory_order_release); } bool try_lock() noexcept { - return !lock_.test_and_set(std::memory_order_acq_rel); + // First do a relaxed load to check if lock is free in order to prevent + // unnecessary cache misses if someone does while(!try_lock()) + return !lock_.load(std::memory_order_relaxed) && + !lock_.exchange(true, std::memory_order_acquire); } counter_type &elem_counter() noexcept { return elem_counter_; } @@ -835,7 +846,7 @@ private: bool is_migrated() const noexcept { return is_migrated_; } private: - std::atomic_flag lock_; + std::atomic lock_ = {0}; counter_type elem_counter_; bool is_migrated_; };