From c962d88c8c82edfc6ecbcc00fc19ea1a7283634a Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Thu, 18 Apr 2024 13:20:01 +0200
Subject: [PATCH] do not reuse stack addresses; make reuse rate configurable

---
 README.md                         |  5 +++++
 src/alloc_addresses/mod.rs        | 14 +++++++++-----
 src/alloc_addresses/reuse_pool.rs | 22 ++++++++++++----------
 src/bin/miri.rs                   | 14 ++++++++++++++
 src/eval.rs                       |  3 +++
 src/machine.rs                    |  4 ++--
 6 files changed, 45 insertions(+), 17 deletions(-)
diff --git a/README.md b/README.md
index 948f1ee6c6..f01ce52f05 100644
--- a/README.md
+++ b/README.md
@@ -295,6 +295,11 @@ up the sysroot.  If you are using `miri` (the Miri driver) directly, see the
 Miri adds its own set of `-Z` flags, which are usually set via the `MIRIFLAGS`
 environment variable. We first document the most relevant and most commonly used flags:
 
+* `-Zmiri-address-reuse-rate=<rate>` changes the probability that a freed *non-stack* allocation
+  will be added to the pool for address reuse, and the probability that a new *non-stack* allocation
+  will be taken from the pool. Stack allocations never get added to or taken from the pool. The
+  default is `0.5`. Note that a very high reuse rate can mask concurrency bugs as address
+  reuse induces synchronization between threads.
 * `-Zmiri-compare-exchange-weak-failure-rate=<rate>` changes the failure rate of
   `compare_exchange_weak` operations. The default is `0.8` (so 4 out of 5 weak ops will fail).
   You can change it to any value between `0.0` and `1.0`, where `1.0` means it
diff --git a/src/alloc_addresses/mod.rs b/src/alloc_addresses/mod.rs
index 50142d6b5a..247f829297 100644
--- a/src/alloc_addresses/mod.rs
+++ b/src/alloc_addresses/mod.rs
@@ -78,7 +78,7 @@ impl GlobalStateInner {
         GlobalStateInner {
             int_to_ptr_map: Vec::default(),
             base_addr: FxHashMap::default(),
-            reuse: ReusePool::new(),
+            reuse: ReusePool::new(config.address_reuse_rate),
             exposed: FxHashSet::default(),
             next_base_addr: stack_addr,
             provenance_mode: config.provenance_mode,
@@ -142,7 +142,11 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
         }
     }
 
-    fn addr_from_alloc_id(&self, alloc_id: AllocId, _kind: MemoryKind) -> InterpResult<'tcx, u64> {
+    fn addr_from_alloc_id(
+        &self,
+        alloc_id: AllocId,
+        memory_kind: MemoryKind,
+    ) -> InterpResult<'tcx, u64> {
         let ecx = self.eval_context_ref();
         let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
         let global_state = &mut *global_state;
@@ -161,7 +165,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
 
                 // This allocation does not have a base address yet, pick or reuse one.
                 let base_addr = if let Some((reuse_addr, clock)) =
-                    global_state.reuse.take_addr(&mut *rng, size, align)
+                    global_state.reuse.take_addr(&mut *rng, size, align, memory_kind)
                 {
                     if let Some(data_race) = &ecx.machine.data_race {
                         data_race.validate_lock_acquire(&clock, ecx.get_active_thread());
@@ -334,7 +338,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
 }
 
 impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
-    pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align) {
+    pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
         let global_state = self.alloc_addresses.get_mut();
         let rng = self.rng.get_mut();
 
@@ -359,7 +363,7 @@ impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
         // `alloc_id_from_addr` any more.
         global_state.exposed.remove(&dead_id);
         // Also remember this address for future reuse.
-        global_state.reuse.add_addr(rng, addr, size, align, || {
+        global_state.reuse.add_addr(rng, addr, size, align, kind, || {
             let mut clock = concurrency::VClock::default();
             if let Some(data_race) = &self.data_race {
                 data_race.validate_lock_release(
diff --git a/src/alloc_addresses/reuse_pool.rs b/src/alloc_addresses/reuse_pool.rs
index 9af4bdac4c..3b2f0269eb 100644
--- a/src/alloc_addresses/reuse_pool.rs
+++ b/src/alloc_addresses/reuse_pool.rs
@@ -4,20 +4,17 @@ use rand::Rng;
 
 use rustc_target::abi::{Align, Size};
 
-use crate::concurrency::VClock;
+use crate::{concurrency::VClock, MemoryKind};
 
 const MAX_POOL_SIZE: usize = 64;
 
-// Just use fair coins, until we have evidence that other numbers are better.
-const ADDR_REMEMBER_CHANCE: f64 = 0.5;
-const ADDR_TAKE_CHANCE: f64 = 0.5;
-
 /// The pool strikes a balance between exploring more possible executions and making it more likely
 /// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for
 /// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data
 /// structure. Therefore we only reuse allocations when size and alignment match exactly.
 #[derive(Debug)]
 pub struct ReusePool {
+    address_reuse_rate: f64,
     /// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable
     /// allocations as address-size pairs, the list must be sorted by the size.
     ///
@@ -30,8 +27,8 @@ pub struct ReusePool {
 }
 
 impl ReusePool {
-    pub fn new() -> Self {
-        ReusePool { pool: vec![] }
+    pub fn new(address_reuse_rate: f64) -> Self {
+        ReusePool { address_reuse_rate, pool: vec![] }
     }
 
     fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size, VClock)> {
@@ -48,10 +45,14 @@ impl ReusePool {
         addr: u64,
         size: Size,
         align: Align,
+        kind: MemoryKind,
         clock: impl FnOnce() -> VClock,
     ) {
         // Let's see if we even want to remember this address.
-        if !rng.gen_bool(ADDR_REMEMBER_CHANCE) {
+        // We don't remember stack addresses: there's a lot of them (so the perf impact is big),
+        // and we only want to reuse stack slots within the same thread or else we'll add a lot of
+        // undesired synchronization.
+        if kind == MemoryKind::Stack || !rng.gen_bool(self.address_reuse_rate) {
             return;
         }
         // Determine the pool to add this to, and where in the pool to put it.
@@ -73,9 +74,10 @@ impl ReusePool {
         rng: &mut impl Rng,
         size: Size,
         align: Align,
+        kind: MemoryKind,
     ) -> Option<(u64, VClock)> {
-        // Determine whether we'll even attempt a reuse.
-        if !rng.gen_bool(ADDR_TAKE_CHANCE) {
+        // Determine whether we'll even attempt a reuse. As above, we don't do reuse for stack addresses.
+        if kind == MemoryKind::Stack || !rng.gen_bool(self.address_reuse_rate) {
             return None;
         }
         // Determine the pool to take this from.
diff --git a/src/bin/miri.rs b/src/bin/miri.rs
index 3f7a965e9d..263a78257f 100644
--- a/src/bin/miri.rs
+++ b/src/bin/miri.rs
@@ -542,6 +542,20 @@ fn main() {
             miri_config.tracked_alloc_ids.extend(ids);
         } else if arg == "-Zmiri-track-alloc-accesses" {
             miri_config.track_alloc_accesses = true;
+        } else if let Some(param) = arg.strip_prefix("-Zmiri-address-reuse-rate=") {
+            let rate = match param.parse::<f64>() {
+                Ok(rate) if rate >= 0.0 && rate <= 1.0 => rate,
+                Ok(_) =>
+                    show_error!(
+                        "-Zmiri-compare-exchange-weak-failure-rate must be between `0.0` and `1.0`"
+                    ),
+                Err(err) =>
+                    show_error!(
+                        "-Zmiri-compare-exchange-weak-failure-rate requires a `f64` between `0.0` and `1.0`: {}",
+                        err
+                    ),
+            };
+            miri_config.address_reuse_rate = rate;
         } else if let Some(param) = arg.strip_prefix("-Zmiri-compare-exchange-weak-failure-rate=") {
             let rate = match param.parse::<f64>() {
                 Ok(rate) if rate >= 0.0 && rate <= 1.0 => rate,
diff --git a/src/eval.rs b/src/eval.rs
index df0ede1e1b..457fe740d6 100644
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -150,6 +150,8 @@ pub struct MiriConfig {
     pub page_size: Option<u64>,
     /// Whether to collect a backtrace when each allocation is created, just in case it leaks.
     pub collect_leak_backtraces: bool,
+    /// Probability for address reuse.
+    pub address_reuse_rate: f64,
 }
 
 impl Default for MiriConfig {
@@ -186,6 +188,7 @@ impl Default for MiriConfig {
             num_cpus: 1,
             page_size: None,
             collect_leak_backtraces: true,
+            address_reuse_rate: 0.5,
         }
     }
 }
diff --git a/src/machine.rs b/src/machine.rs
index d7c762fe1a..eec162a87e 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1282,7 +1282,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         (alloc_id, prove_extra): (AllocId, Self::ProvenanceExtra),
         size: Size,
         align: Align,
-        _kind: MemoryKind,
+        kind: MemoryKind,
     ) -> InterpResult<'tcx> {
         if machine.tracked_alloc_ids.contains(&alloc_id) {
             machine.emit_diagnostic(NonHaltingDiagnostic::FreedAlloc(alloc_id));
@@ -1303,7 +1303,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         {
             *deallocated_at = Some(machine.current_span());
         }
-        machine.free_alloc_id(alloc_id, size, align);
+        machine.free_alloc_id(alloc_id, size, align, kind);
         Ok(())
     }