smol-rs · taiki-e · Jul 17, 2022 · Jul 17, 2022 · RalfJung · Jul 26, 2022
diff --git a/src/bounded.rs b/src/bounded.rs
@@ -129,8 +129,7 @@ impl<T> Bounded<T> {
                     }
                 }
             } else if stamp.wrapping_add(self.one_lap) == tail + 1 {
-                crate::full_fence();
-                let head = self.head.load(Ordering::Relaxed);
+                let head = crate::full_fence_for_load(|| self.head.load(Ordering::Relaxed));
 
                 // If the head lags one lap behind the tail as well...
                 if head.wrapping_add(self.one_lap) == tail {
@@ -191,8 +190,7 @@ impl<T> Bounded<T> {
                     }
                 }
             } else if stamp == head {
-                crate::full_fence();
-                let tail = self.tail.load(Ordering::Relaxed);
+                let tail = crate::full_fence_for_load(|| self.tail.load(Ordering::Relaxed));
 
                 // If the tail equals the head, that means the queue is empty.
                 if (tail & !self.mark_bit) == head {

diff --git a/src/lib.rs b/src/lib.rs
@@ -445,8 +445,11 @@ impl<T> fmt::Display for PushError<T> {
 
 /// Equivalent to `atomic::fence(Ordering::SeqCst)`, but in some cases faster.
 #[inline]
-fn full_fence() {
-    if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
+fn full_fence_for_load<T>(load_op: impl FnOnce() -> T) -> T {
+    if cfg!(all(
+        any(target_arch = "x86", target_arch = "x86_64"),
+        not(miri)
+    )) {
         // HACK(stjepang): On x86 architectures there are two different ways of executing
         // a `SeqCst` fence.
         //
@@ -461,7 +464,11 @@ fn full_fence() {
         // x86 platforms is going to optimize this away.
         let a = AtomicUsize::new(0);
         let _ = a.compare_exchange(0, 1, Ordering::SeqCst, Ordering::SeqCst);
+        // On x86, `lock cmpxchg; mov` is fine. See also https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html.
+        load_op()
     } else {
+        let res = load_op();
         atomic::fence(Ordering::SeqCst);
+        res
     }
 }
diff --git a/src/unbounded.rs b/src/unbounded.rs
@@ -237,8 +237,7 @@ impl<T> Unbounded<T> {
             let mut new_head = head + (1 << SHIFT);
 
             if new_head & MARK_BIT == 0 {
-                crate::full_fence();
-                let tail = self.tail.index.load(Ordering::Relaxed);
+                let tail = crate::full_fence_for_load(|| self.tail.index.load(Ordering::Relaxed));
 
                 // If the tail equals the head, that means the queue is empty.
                 if head >> SHIFT == tail >> SHIFT {