From 2049ee7571c1ac8ec142dd31811a4c94a483d194 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Tue, 16 Apr 2024 15:23:00 +0200
Subject: [PATCH] when an address gets reused, establish a happens-before link
 in the data race model

---
 src/alloc_addresses/mod.rs                    | 42 ++++++++-----
 src/alloc_addresses/reuse_pool.rs             | 39 ++++++++----
 src/concurrency/mod.rs                        |  2 +
 src/concurrency/thread.rs                     |  6 ++
 src/helpers.rs                                |  6 +-
 src/machine.rs                                |  7 +--
 .../address_reuse_happens_before.rs           | 59 +++++++++++++++++++
 7 files changed, 125 insertions(+), 36 deletions(-)
 create mode 100644 tests/pass/concurrency/address_reuse_happens_before.rs

diff --git a/src/alloc_addresses/mod.rs b/src/alloc_addresses/mod.rs
index fec39ec2b8..8f86a19b44 100644
--- a/src/alloc_addresses/mod.rs
+++ b/src/alloc_addresses/mod.rs
@@ -14,7 +14,8 @@ use rustc_span::Span;
 use rustc_target::abi::{Align, HasDataLayout, Size};
 
 use crate::*;
-use reuse_pool::ReusePool;
+
+use self::reuse_pool::ReusePool;
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum ProvenanceMode {
@@ -159,9 +160,12 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 assert!(!matches!(kind, AllocKind::Dead));
 
                 // This allocation does not have a base address yet, pick or reuse one.
-                let base_addr = if let Some(reuse_addr) =
+                let base_addr = if let Some((reuse_addr, clock)) =
                     global_state.reuse.take_addr(&mut *rng, size, align)
                 {
+                    if let Some(data_race) = &ecx.machine.data_race {
+                        data_race.validate_lock_acquire(&clock, ecx.get_active_thread());
+                    }
                     reuse_addr
                 } else {
                     // We have to pick a fresh address.
@@ -328,14 +332,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
     }
 }
 
-impl GlobalStateInner {
-    pub fn free_alloc_id(
-        &mut self,
-        rng: &mut impl Rng,
-        dead_id: AllocId,
-        size: Size,
-        align: Align,
-    ) {
+impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
+    pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align) {
+        let global_state = self.alloc_addresses.get_mut();
+        let rng = self.rng.get_mut();
+
         // We can *not* remove this from `base_addr`, since the interpreter design requires that we
         // be able to retrieve an AllocId + offset for any memory access *before* we check if the
         // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
@@ -348,15 +349,26 @@ impl GlobalStateInner {
         // returns a dead allocation.
         // To avoid a linear scan we first look up the address in `base_addr`, and then find it in
         // `int_to_ptr_map`.
-        let addr = *self.base_addr.get(&dead_id).unwrap();
-        let pos = self.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
-        let removed = self.int_to_ptr_map.remove(pos);
+        let addr = *global_state.base_addr.get(&dead_id).unwrap();
+        let pos =
+            global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
+        let removed = global_state.int_to_ptr_map.remove(pos);
         assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
         // We can also remove it from `exposed`, since this allocation can anyway not be returned by
         // `alloc_id_from_addr` any more.
-        self.exposed.remove(&dead_id);
+        global_state.exposed.remove(&dead_id);
         // Also remember this address for future reuse.
-        self.reuse.add_addr(rng, addr, size, align)
+        global_state.reuse.add_addr(rng, addr, size, align, || {
+            let mut clock = concurrency::VClock::default();
+            if let Some(data_race) = &self.data_race {
+                data_race.validate_lock_release(
+                    &mut clock,
+                    self.threads.get_active_thread_id(),
+                    self.threads.active_thread_ref().current_span(),
+                );
+            }
+            clock
+        })
     }
 }
 
diff --git a/src/alloc_addresses/reuse_pool.rs b/src/alloc_addresses/reuse_pool.rs
index 8374d0ec60..9af4bdac4c 100644
--- a/src/alloc_addresses/reuse_pool.rs
+++ b/src/alloc_addresses/reuse_pool.rs
@@ -4,6 +4,8 @@ use rand::Rng;
 
 use rustc_target::abi::{Align, Size};
 
+use crate::concurrency::VClock;
+
 const MAX_POOL_SIZE: usize = 64;
 
 // Just use fair coins, until we have evidence that other numbers are better.
@@ -21,7 +23,10 @@ pub struct ReusePool {
     ///
     /// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to
     /// less than 64 different possible value, that bounds the overall size of the pool.
-    pool: Vec<Vec<(u64, Size)>>,
+    ///
+    /// We also store the clock from the thread that donated this pool element,
+    /// to ensure synchronization with the thread that picks up this address.
+    pool: Vec<Vec<(u64, Size, VClock)>>,
 }
 
 impl ReusePool {
@@ -29,7 +34,7 @@ impl ReusePool {
         ReusePool { pool: vec![] }
     }
 
-    fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> {
+    fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size, VClock)> {
         let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap();
         if self.pool.len() <= pool_idx {
             self.pool.resize(pool_idx + 1, Vec::new());
@@ -37,26 +42,38 @@ impl ReusePool {
         &mut self.pool[pool_idx]
     }
 
-    pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) {
+    pub fn add_addr(
+        &mut self,
+        rng: &mut impl Rng,
+        addr: u64,
+        size: Size,
+        align: Align,
+        clock: impl FnOnce() -> VClock,
+    ) {
         // Let's see if we even want to remember this address.
         if !rng.gen_bool(ADDR_REMEMBER_CHANCE) {
             return;
         }
         // Determine the pool to add this to, and where in the pool to put it.
         let subpool = self.subpool(align);
-        let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size);
+        let pos = subpool.partition_point(|(_addr, other_size, _)| *other_size < size);
         // Make sure the pool does not grow too big.
         if subpool.len() >= MAX_POOL_SIZE {
             // Pool full. Replace existing element, or last one if this would be even bigger.
             let clamped_pos = pos.min(subpool.len() - 1);
-            subpool[clamped_pos] = (addr, size);
+            subpool[clamped_pos] = (addr, size, clock());
             return;
         }
         // Add address to pool, at the right position.
-        subpool.insert(pos, (addr, size));
+        subpool.insert(pos, (addr, size, clock()));
     }
 
-    pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option<u64> {
+    pub fn take_addr(
+        &mut self,
+        rng: &mut impl Rng,
+        size: Size,
+        align: Align,
+    ) -> Option<(u64, VClock)> {
         // Determine whether we'll even attempt a reuse.
         if !rng.gen_bool(ADDR_TAKE_CHANCE) {
             return None;
@@ -65,9 +82,9 @@ impl ReusePool {
         let subpool = self.subpool(align);
         // Let's see if we can find something of the right size. We want to find the full range of
         // such items, beginning with the first, so we can't use `binary_search_by_key`.
-        let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size);
+        let begin = subpool.partition_point(|(_addr, other_size, _)| *other_size < size);
         let mut end = begin;
-        while let Some((_addr, other_size)) = subpool.get(end) {
+        while let Some((_addr, other_size, _)) = subpool.get(end) {
             if *other_size != size {
                 break;
             }
@@ -80,8 +97,8 @@ impl ReusePool {
         // Pick a random element with the desired size.
         let idx = rng.gen_range(begin..end);
         // Remove it from the pool and return.
-        let (chosen_addr, chosen_size) = subpool.remove(idx);
+        let (chosen_addr, chosen_size, clock) = subpool.remove(idx);
         debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0);
-        Some(chosen_addr)
+        Some((chosen_addr, clock))
     }
 }
diff --git a/src/concurrency/mod.rs b/src/concurrency/mod.rs
index 45903107f1..15e1a94d6d 100644
--- a/src/concurrency/mod.rs
+++ b/src/concurrency/mod.rs
@@ -6,3 +6,5 @@ pub mod init_once;
 pub mod thread;
 mod vector_clock;
 pub mod weak_memory;
+
+pub use vector_clock::VClock;
diff --git a/src/concurrency/thread.rs b/src/concurrency/thread.rs
index 06e2318833..00467fe07e 100644
--- a/src/concurrency/thread.rs
+++ b/src/concurrency/thread.rs
@@ -223,6 +223,12 @@ impl<'mir, 'tcx> Thread<'mir, 'tcx> {
         // empty stacks.
         self.top_user_relevant_frame.or_else(|| self.stack.len().checked_sub(1))
     }
+
+    pub fn current_span(&self) -> Span {
+        self.top_user_relevant_frame()
+            .map(|frame_idx| self.stack[frame_idx].current_span())
+            .unwrap_or(rustc_span::DUMMY_SP)
+    }
 }
 
 impl<'mir, 'tcx> std::fmt::Debug for Thread<'mir, 'tcx> {
diff --git a/src/helpers.rs b/src/helpers.rs
index e2c6769ccb..17ae2dd91a 100644
--- a/src/helpers.rs
+++ b/src/helpers.rs
@@ -1265,9 +1265,7 @@ impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
     /// This function is backed by a cache, and can be assumed to be very fast.
     /// It will work even when the stack is empty.
     pub fn current_span(&self) -> Span {
-        self.top_user_relevant_frame()
-            .map(|frame_idx| self.stack()[frame_idx].current_span())
-            .unwrap_or(rustc_span::DUMMY_SP)
+        self.threads.active_thread_ref().current_span()
     }
 
     /// Returns the span of the *caller* of the current operation, again
@@ -1279,7 +1277,7 @@ impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
         // We need to go down at least to the caller (len - 2), or however
         // far we have to go to find a frame in a local crate which is also not #[track_caller].
         let frame_idx = self.top_user_relevant_frame().unwrap();
-        let frame_idx = cmp::min(frame_idx, self.stack().len().checked_sub(2).unwrap());
+        let frame_idx = cmp::min(frame_idx, self.stack().len().saturating_sub(2));
         self.stack()[frame_idx].current_span()
     }
 
diff --git a/src/machine.rs b/src/machine.rs
index ff081328a7..51123484d4 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1300,12 +1300,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         {
             *deallocated_at = Some(machine.current_span());
         }
-        machine.alloc_addresses.get_mut().free_alloc_id(
-            machine.rng.get_mut(),
-            alloc_id,
-            size,
-            align,
-        );
+        machine.free_alloc_id(alloc_id, size, align);
         Ok(())
     }
 
diff --git a/tests/pass/concurrency/address_reuse_happens_before.rs b/tests/pass/concurrency/address_reuse_happens_before.rs
new file mode 100644
index 0000000000..13a00f5276
--- /dev/null
+++ b/tests/pass/concurrency/address_reuse_happens_before.rs
@@ -0,0 +1,59 @@
+//! Regression test for <https://github.com/rust-lang/miri/issues/3450>:
+//! When the address gets reused, there should be a happens-before relation.
+#![feature(strict_provenance)]
+#![feature(sync_unsafe_cell)]
+
+use std::cell::SyncUnsafeCell;
+use std::sync::atomic::{AtomicUsize, Ordering::Relaxed};
+use std::thread;
+
+static ADDR: AtomicUsize = AtomicUsize::new(0);
+static VAL: SyncUnsafeCell<i32> = SyncUnsafeCell::new(0);
+
+fn addr() -> usize {
+    let alloc = Box::new(42);
+    <*const i32>::addr(&*alloc)
+}
+
+fn thread1() {
+    unsafe {
+        VAL.get().write(42);
+    }
+    let alloc = addr();
+    ADDR.store(alloc, Relaxed);
+}
+
+fn thread2() -> bool {
+    // We try to get an allocation at the same address. If we fail too often, try again with a
+    // different allocation to ensure it is still in Miri's reuse cache.
+    for _ in 0..16 {
+        let alloc = addr();
+        let addr = ADDR.load(Relaxed);
+        if alloc == addr {
+            // If the new allocation is at the same address as the old one, there must be a
+            // happens-before relationship between them. Therefore, we can read VAL without racing
+            // and must observe the write above.
+            let val = unsafe { VAL.get().read() };
+            assert_eq!(val, 42);
+            return true;
+        }
+    }
+
+    false
+}
+
+fn main() {
+    let mut success = false;
+    while !success {
+        let t1 = thread::spawn(thread1);
+        let t2 = thread::spawn(thread2);
+        t1.join().unwrap();
+        success = t2.join().unwrap();
+
+        // Reset everything.
+        ADDR.store(0, Relaxed);
+        unsafe {
+            VAL.get().write(0);
+        }
+    }
+}