From 2880e438549b9a4df7ce370354e765cd08bae6bf Mon Sep 17 00:00:00 2001 From: Julien Portalier Date: Mon, 6 May 2024 12:16:05 +0200 Subject: [PATCH] Fix: no need for explicit memory barriers on ARM Reading the LLVM atomics documentation, the notes for code generation state that weak architectures should generate memory barriers, which implies that all memory orders, except for relaxed, should always generate the necessary memory barriers when asked to. Compiling and disassembling programs for ARMv6 and ARMv7 (you must specify a CPU or CPU feature to target these), we can indeed notice that memory barriers are properly emitted by the LLVM code generation. For example the assembly contains `dmb ish` for ARMv7 and `mcr 15, 0, r1, cr7, cr10, {5}` for ARMv6. Compiling for older ARMv4 or ARMv5 the atomics call the `__sync_*` symbols from `libgcc` where memory ordering is a noop (not supported). This patch thus removes the explicit memory barriers as they duplicate what LLVM backends are already doing anyway. --- src/atomic.cr | 21 +++++---------------- src/crystal/rw_lock.cr | 13 ------------- src/crystal/spin_lock.cr | 6 ------ src/mutex.cr | 9 --------- 4 files changed, 5 insertions(+), 44 deletions(-) diff --git a/src/atomic.cr b/src/atomic.cr index db6cd7e88b84..c5d14513081b 100644 --- a/src/atomic.cr +++ b/src/atomic.cr @@ -20,6 +20,10 @@ struct Atomic(T) # (e.g. locks) you may try the acquire/release semantics that may be faster on # some architectures (e.g. X86) but remember that an acquire must be paired # with a release for the ordering to be guaranteed. + # + # The code generation always enforces the selected memory order, even on + # weak CPU architectures (e.g. ARM32), with the exception of the Relaxed + # memory order where only the operation itself is atomic. enum Ordering Relaxed = LLVM::AtomicOrdering::Monotonic Acquire = LLVM::AtomicOrdering::Acquire @@ -29,14 +33,6 @@ struct Atomic(T) end # Adds an explicit memory barrier with the specified memory order guarantee. - # - # Atomics on weakly-ordered CPUs (e.g. ARM32) may not guarantee memory order - # of other memory accesses, and an explicit memory barrier is thus required. - # - # Notes: - # - X86 is strongly-ordered and trying to add a fence should be a NOOP; - # - AArch64 guarantees memory order and doesn't need explicit fences in - # addition to the atomics (but may need barriers in other cases). macro fence(ordering = :sequentially_consistent) ::Atomic::Ops.fence({{ordering}}, false) end @@ -470,18 +466,11 @@ struct Atomic::Flag # Atomically tries to set the flag. Only succeeds and returns `true` if the # flag wasn't previously set; returns `false` otherwise. def test_and_set : Bool - ret = @value.swap(true, :sequentially_consistent) == false - {% if flag?(:arm) %} - Atomic::Ops.fence(:sequentially_consistent, false) if ret - {% end %} - ret + @value.swap(true, :sequentially_consistent) == false end # Atomically clears the flag. def clear : Nil - {% if flag?(:arm) %} - Atomic::Ops.fence(:sequentially_consistent, false) - {% end %} @value.set(false, :sequentially_consistent) end end diff --git a/src/crystal/rw_lock.cr b/src/crystal/rw_lock.cr index c83074180e25..77b7331b76fe 100644 --- a/src/crystal/rw_lock.cr +++ b/src/crystal/rw_lock.cr @@ -15,9 +15,6 @@ struct Crystal::RWLock @readers.add(1, :acquire) if @writer.get(:acquire) == UNLOCKED - {% if flag?(:arm) %} - Atomic.fence(:acquire) - {% end %} return end @@ -26,9 +23,6 @@ struct Crystal::RWLock end def read_unlock : Nil - {% if flag?(:arm) %} - Atomic.fence(:release) - {% end %} @readers.sub(1, :release) end @@ -40,16 +34,9 @@ struct Crystal::RWLock while @readers.get(:acquire) != 0 Intrinsics.pause end - - {% if flag?(:arm) %} - Atomic.fence(:acquire) - {% end %} end def write_unlock : Nil - {% if flag?(:arm) %} - Atomic.fence(:release) - {% end %} @writer.set(UNLOCKED, :release) end end diff --git a/src/crystal/spin_lock.cr b/src/crystal/spin_lock.cr index 0792ce652159..4255fcae7bbd 100644 --- a/src/crystal/spin_lock.cr +++ b/src/crystal/spin_lock.cr @@ -14,17 +14,11 @@ class Crystal::SpinLock Intrinsics.pause end end - {% if flag?(:arm) %} - Atomic.fence(:acquire) - {% end %} {% end %} end def unlock {% if flag?(:preview_mt) %} - {% if flag?(:arm) %} - Atomic.fence(:release) - {% end %} @m.set(UNLOCKED, :release) {% end %} end diff --git a/src/mutex.cr b/src/mutex.cr index 38d34c14b070..e72ede2c8261 100644 --- a/src/mutex.cr +++ b/src/mutex.cr @@ -38,9 +38,6 @@ class Mutex @[AlwaysInline] def lock : Nil if @state.swap(LOCKED, :acquire) == UNLOCKED - {% if flag?(:arm) %} - Atomic.fence(:acquire) - {% end %} @mutex_fiber = Fiber.current unless @protection.unchecked? return end @@ -67,9 +64,6 @@ class Mutex if @state.get(:relaxed) == UNLOCKED if @state.swap(LOCKED, :acquire) == UNLOCKED - {% if flag?(:arm) %} - Atomic.fence(:acquire) - {% end %} @queue_count.sub(1) @mutex_fiber = Fiber.current unless @protection.unchecked? @@ -94,9 +88,6 @@ class Mutex return false if i == 0 end end - {% if flag?(:arm) %} - Atomic.fence(:acquire) - {% end %} true end