From 9eb94e8d2011e87b1552fc57a7cb4bfa8489541f Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Mon, 29 Jan 2024 20:49:57 +0000
Subject: [PATCH 01/30] moving out sched_getaffinity interception from
 linux'shim, FreeBSD supporting it too.

---
 .../miri/src/shims/unix/foreign_items.rs      | 19 +++++++++++++++++++
 .../src/shims/unix/linux/foreign_items.rs     | 11 -----------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs
index 35036ce078d37..bd56139a291d5 100644
--- a/src/tools/miri/src/shims/unix/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/foreign_items.rs
@@ -711,6 +711,25 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 }
             }
 
+            "sched_getaffinity" => {
+                // FreeBSD supports it as well since 13.1 (as a wrapper of cpuset_getaffinity)
+                if !matches!(&*this.tcx.sess.target.os, "linux" | "freebsd") {
+                    throw_unsup_format!(
+                        "`sched_getaffinity` is not supported on {}",
+                        this.tcx.sess.target.os
+                    );
+                }
+                let [pid, cpusetsize, mask] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+                this.read_scalar(pid)?.to_i32()?;
+                this.read_target_usize(cpusetsize)?;
+                this.deref_pointer_as(mask, this.libc_ty_layout("cpu_set_t"))?;
+                // FIXME: we just return an error; `num_cpus` then falls back to `sysconf`.
+                let einval = this.eval_libc("EINVAL");
+                this.set_last_error(einval)?;
+                this.write_scalar(Scalar::from_i32(-1), dest)?;
+            }
+
             // Platform-specific shims
             _ => {
                 let target_os = &*this.tcx.sess.target.os;
diff --git a/src/tools/miri/src/shims/unix/linux/foreign_items.rs b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
index 6937e0f089ec8..1a233d73d5144 100644
--- a/src/tools/miri/src/shims/unix/linux/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
@@ -197,17 +197,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                     this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
                 getrandom(this, ptr, len, flags, dest)?;
             }
-            "sched_getaffinity" => {
-                let [pid, cpusetsize, mask] =
-                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-                this.read_scalar(pid)?.to_i32()?;
-                this.read_target_usize(cpusetsize)?;
-                this.deref_pointer_as(mask, this.libc_ty_layout("cpu_set_t"))?;
-                // FIXME: we just return an error; `num_cpus` then falls back to `sysconf`.
-                let einval = this.eval_libc("EINVAL");
-                this.set_last_error(einval)?;
-                this.write_scalar(Scalar::from_i32(-1), dest)?;
-            }
 
             // Incomplete shims that we "stub out" just to get pre-main initialization code to work.
             // These shims are enabled only when the caller is in the standard library.

From 3d26a4182f9bb2d4489504d546da8a9ab37158d7 Mon Sep 17 00:00:00 2001
From: Oli Scherer <git-spam-no-reply9815368754983@oli-obk.de>
Date: Mon, 3 Jul 2023 09:01:24 +0000
Subject: [PATCH 02/30] Switch over to rustc's `tracing` crate instead of using
 our own `log` crate

---
 src/tools/miri/CONTRIBUTING.md                | 11 +++-
 src/tools/miri/Cargo.lock                     | 56 -------------------
 src/tools/miri/Cargo.toml                     |  2 -
 src/tools/miri/src/bin/miri.rs                | 10 +---
 src/tools/miri/src/borrow_tracker/mod.rs      |  1 -
 .../src/borrow_tracker/stacked_borrows/mod.rs |  2 -
 .../borrow_tracker/stacked_borrows/stack.rs   |  2 +-
 .../src/borrow_tracker/tree_borrows/mod.rs    |  2 -
 src/tools/miri/src/concurrency/data_race.rs   | 20 +++----
 src/tools/miri/src/concurrency/sync.rs        |  2 -
 src/tools/miri/src/concurrency/thread.rs      |  1 -
 src/tools/miri/src/diagnostics.rs             |  2 -
 src/tools/miri/src/eval.rs                    |  5 +-
 src/tools/miri/src/helpers.rs                 |  2 -
 src/tools/miri/src/intptrcast.rs              |  1 -
 src/tools/miri/src/lib.rs                     |  2 +
 src/tools/miri/src/operator.rs                |  2 -
 src/tools/miri/src/shims/foreign_items.rs     |  2 -
 src/tools/miri/src/shims/intrinsics/mod.rs    |  2 -
 src/tools/miri/src/shims/panic.rs             |  2 -
 src/tools/miri/src/shims/tls.rs               |  2 -
 .../miri/src/shims/unix/foreign_items.rs      |  2 -
 src/tools/miri/src/shims/unix/fs.rs           |  2 -
 23 files changed, 27 insertions(+), 108 deletions(-)

diff --git a/src/tools/miri/CONTRIBUTING.md b/src/tools/miri/CONTRIBUTING.md
index 7a49ff3372f5f..f2f3a642e0a03 100644
--- a/src/tools/miri/CONTRIBUTING.md
+++ b/src/tools/miri/CONTRIBUTING.md
@@ -78,6 +78,8 @@ custom target file, you might have to set `MIRI_NO_STD=1`.
 base directory, e.g. `./miri test fail` will run all compile-fail tests). These filters are passed
 to `cargo test`, so for multiple filers you need to use `./miri test -- FILTER1 FILTER2`.
 
+#### Fine grained logging
+
 You can get a trace of which MIR statements are being executed by setting the
 `MIRI_LOG` environment variable.  For example:
 
@@ -94,9 +96,16 @@ stacked borrows implementation:
 MIRI_LOG=rustc_mir::interpret=info,miri::stacked_borrows ./miri run tests/pass/vec.rs
 ```
 
-In addition, you can set `MIRI_BACKTRACE=1` to get a backtrace of where an
+Note that you will only get `info`, `warn` or `error` messages if you use a prebuilt compiler.
+In order to get `debug` and `trace` level messages, you need to build miri with a locally built
+compiler that has `debug=true` set in `config.toml`.
+
+#### Debugging error messages
+
+You can set `MIRI_BACKTRACE=1` to get a backtrace of where an
 evaluation error was originally raised.
 
+
 ### UI testing
 
 We use ui-testing in Miri, meaning we generate `.stderr` and `.stdout` files for the output
diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock
index 8cd996d85645b..87dc51bd61255 100644
--- a/src/tools/miri/Cargo.lock
+++ b/src/tools/miri/Cargo.lock
@@ -273,19 +273,6 @@ version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
 
-[[package]]
-name = "env_logger"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95b3f3e67048839cb0d0781f445682a35113da7121f7c949db0e2be96a4fbece"
-dependencies = [
- "humantime",
- "is-terminal",
- "log",
- "regex",
- "termcolor",
-]
-
 [[package]]
 name = "errno"
 version = "0.3.8"
@@ -339,18 +326,6 @@ version = "0.28.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
 
-[[package]]
-name = "hermit-abi"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
-
-[[package]]
-name = "humantime"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
-
 [[package]]
 name = "indenter"
 version = "0.3.3"
@@ -388,17 +363,6 @@ dependencies = [
  "cfg-if",
 ]
 
-[[package]]
-name = "is-terminal"
-version = "0.4.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455"
-dependencies = [
- "hermit-abi",
- "rustix",
- "windows-sys 0.52.0",
-]
-
 [[package]]
 name = "itoa"
 version = "1.0.10"
@@ -529,14 +493,12 @@ dependencies = [
  "aes",
  "colored",
  "ctrlc",
- "env_logger",
  "getrandom",
  "jemalloc-sys",
  "lazy_static",
  "libc",
  "libffi",
  "libloading",
- "log",
  "measureme",
  "rand",
  "regex",
@@ -875,15 +837,6 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
-[[package]]
-name = "termcolor"
-version = "1.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449"
-dependencies = [
- "winapi-util",
-]
-
 [[package]]
 name = "thiserror"
 version = "1.0.56"
@@ -1034,15 +987,6 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
-[[package]]
-name = "winapi-util"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
-dependencies = [
- "winapi",
-]
-
 [[package]]
 name = "winapi-x86_64-pc-windows-gnu"
 version = "0.4.0"
diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml
index a65010b055b63..39122c847ce0d 100644
--- a/src/tools/miri/Cargo.toml
+++ b/src/tools/miri/Cargo.toml
@@ -19,8 +19,6 @@ doctest = false # and no doc tests
 
 [dependencies]
 getrandom = { version = "0.2", features = ["std"] }
-env_logger = "0.10"
-log = "0.4"
 rand = "0.8"
 smallvec = "1.7"
 aes = { version = "0.8.3", features = ["hazmat"] }
diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs
index e8e10f64ad295..c78444fa94ffe 100644
--- a/src/tools/miri/src/bin/miri.rs
+++ b/src/tools/miri/src/bin/miri.rs
@@ -13,14 +13,14 @@ extern crate rustc_log;
 extern crate rustc_metadata;
 extern crate rustc_middle;
 extern crate rustc_session;
+#[macro_use]
+extern crate tracing;
 
 use std::env::{self, VarError};
 use std::num::NonZeroU64;
 use std::path::PathBuf;
 use std::str::FromStr;
 
-use log::debug;
-
 use rustc_data_structures::sync::Lrc;
 use rustc_driver::Compilation;
 use rustc_hir::{self as hir, Node};
@@ -197,7 +197,7 @@ fn rustc_logger_config() -> rustc_log::LoggerConfig {
             // CTFE-related. Otherwise, we use it verbatim for `RUSTC_LOG`.
             // This way, if you set `MIRI_LOG=trace`, you get only the right parts of
             // rustc traced, but you can also do `MIRI_LOG=miri=trace,rustc_const_eval::interpret=debug`.
-            if log::Level::from_str(&var).is_ok() {
+            if tracing::Level::from_str(&var).is_ok() {
                 cfg.filter = Ok(format!(
                     "rustc_middle::mir::interpret={var},rustc_const_eval::interpret={var}"
                 ));
@@ -215,10 +215,6 @@ fn rustc_logger_config() -> rustc_log::LoggerConfig {
 }
 
 fn init_early_loggers(early_dcx: &EarlyDiagCtxt) {
-    // Note that our `extern crate log` is *not* the same as rustc's; as a result, we have to
-    // initialize them both, and we always initialize `miri`'s first.
-    let env = env_logger::Env::new().filter("MIRI_LOG").write_style("MIRI_LOG_STYLE");
-    env_logger::init_from_env(env);
     // Now for rustc. We only initialize `rustc` if the env var is set (so the user asked for it).
     // If it is not set, we avoid initializing now so that we can initialize later with our custom
     // settings, and *not* log anything for what happens before `miri` gets started.
diff --git a/src/tools/miri/src/borrow_tracker/mod.rs b/src/tools/miri/src/borrow_tracker/mod.rs
index 74ff6ed4e0abe..c5cd78ad6658c 100644
--- a/src/tools/miri/src/borrow_tracker/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/mod.rs
@@ -2,7 +2,6 @@ use std::cell::RefCell;
 use std::fmt;
 use std::num::NonZeroU64;
 
-use log::trace;
 use smallvec::SmallVec;
 
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
index 7740d383ee3f2..0fe422180f76f 100644
--- a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
@@ -9,8 +9,6 @@ use std::cmp;
 use std::fmt::Write;
 use std::mem;
 
-use log::trace;
-
 use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::mir::{Mutability, RetagKind};
 use rustc_middle::ty::{self, layout::HasParamEnv, Ty};
diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/stack.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/stack.rs
index 291807c25eeb7..712c26a9afd7b 100644
--- a/src/tools/miri/src/borrow_tracker/stacked_borrows/stack.rs
+++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/stack.rs
@@ -385,7 +385,7 @@ impl<'tcx> Stack {
             let upper = unique_range.end;
             for item in &mut self.borrows[lower..upper] {
                 if item.perm() == Permission::Unique {
-                    log::trace!("access: disabling item {:?}", item);
+                    trace!("access: disabling item {:?}", item);
                     visitor(*item)?;
                     item.set_permission(Permission::Disabled);
                     // Also update all copies of this item in the cache.
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
index 0945a5292bb92..cc98286534107 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
@@ -1,5 +1,3 @@
-use log::trace;
-
 use rustc_target::abi::{Abi, Size};
 
 use crate::borrow_tracker::{AccessKind, GlobalState, GlobalStateInner, ProtectorKind};
diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs
index 80d0402fc8758..a280448ae05d2 100644
--- a/src/tools/miri/src/concurrency/data_race.rs
+++ b/src/tools/miri/src/concurrency/data_race.rs
@@ -466,7 +466,7 @@ impl MemoryCellClocks {
         index: VectorIdx,
         access_size: Size,
     ) -> Result<(), DataRace> {
-        log::trace!("Atomic read with vectors: {:#?} :: {:#?}", self, thread_clocks);
+        trace!("Atomic read with vectors: {:#?} :: {:#?}", self, thread_clocks);
         let atomic = self.atomic_access(thread_clocks, access_size)?;
         atomic.read_vector.set_at_index(&thread_clocks.clock, index);
         // Make sure the last non-atomic write and all non-atomic reads were before this access.
@@ -485,7 +485,7 @@ impl MemoryCellClocks {
         index: VectorIdx,
         access_size: Size,
     ) -> Result<(), DataRace> {
-        log::trace!("Atomic write with vectors: {:#?} :: {:#?}", self, thread_clocks);
+        trace!("Atomic write with vectors: {:#?} :: {:#?}", self, thread_clocks);
         let atomic = self.atomic_access(thread_clocks, access_size)?;
         atomic.write_vector.set_at_index(&thread_clocks.clock, index);
         // Make sure the last non-atomic write and all non-atomic reads were before this access.
@@ -504,7 +504,7 @@ impl MemoryCellClocks {
         index: VectorIdx,
         current_span: Span,
     ) -> Result<(), DataRace> {
-        log::trace!("Unsynchronized read with vectors: {:#?} :: {:#?}", self, thread_clocks);
+        trace!("Unsynchronized read with vectors: {:#?} :: {:#?}", self, thread_clocks);
         if !current_span.is_dummy() {
             thread_clocks.clock[index].span = current_span;
         }
@@ -533,7 +533,7 @@ impl MemoryCellClocks {
         write_type: NaWriteType,
         current_span: Span,
     ) -> Result<(), DataRace> {
-        log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, thread_clocks);
+        trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, thread_clocks);
         if !current_span.is_dummy() {
             thread_clocks.clock[index].span = current_span;
         }
@@ -743,7 +743,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
                 &this.machine.threads,
                 current_span,
                 |index, mut clocks| {
-                    log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
+                    trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
 
                     // Apply data-race detection for the current fences
                     // this treats AcqRel and SeqCst as the same as an acquire
@@ -841,7 +841,7 @@ impl VClockAlloc {
     // Find an index, if one exists where the value
     // in `l` is greater than the value in `r`.
     fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
-        log::trace!("Find index where not {:?} <= {:?}", l, r);
+        trace!("Find index where not {:?} <= {:?}", l, r);
         let l_slice = l.as_slice();
         let r_slice = r.as_slice();
         l_slice
@@ -1270,7 +1270,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
                 // Load and log the atomic operation.
                 // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
                 let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
-                log::trace!(
+                trace!(
                     "Atomic op({}) with ordering {:?} on {:?} (size={})",
                     access.description(),
                     &atomic,
@@ -1311,11 +1311,11 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
                 )?;
 
                 // Log changes to atomic memory.
-                if log::log_enabled!(log::Level::Trace) {
+                if tracing::enabled!(tracing::Level::TRACE) {
                     for (_offset, mem_clocks) in
                         alloc_meta.alloc_ranges.borrow().iter(base_offset, size)
                     {
-                        log::trace!(
+                        trace!(
                             "Updated atomic memory({:?}, size={}) to {:#?}",
                             place.ptr(),
                             size.bytes(),
@@ -1530,7 +1530,7 @@ impl GlobalState {
             vector_info.push(thread)
         };
 
-        log::trace!("Creating thread = {:?} with vector index = {:?}", thread, created_index);
+        trace!("Creating thread = {:?} with vector index = {:?}", thread, created_index);
 
         // Mark the chosen vector index as in use by the thread.
         thread_info[thread].vector_index = Some(created_index);
diff --git a/src/tools/miri/src/concurrency/sync.rs b/src/tools/miri/src/concurrency/sync.rs
index b288b69e0cef9..42b9161afdb8c 100644
--- a/src/tools/miri/src/concurrency/sync.rs
+++ b/src/tools/miri/src/concurrency/sync.rs
@@ -2,8 +2,6 @@ use std::collections::{hash_map::Entry, VecDeque};
 use std::num::NonZeroU32;
 use std::ops::Not;
 
-use log::trace;
-
 use rustc_data_structures::fx::FxHashMap;
 use rustc_index::{Idx, IndexVec};
 use rustc_middle::ty::layout::TyAndLayout;
diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs
index 754cfa4d2a827..64e1f3c5b5574 100644
--- a/src/tools/miri/src/concurrency/thread.rs
+++ b/src/tools/miri/src/concurrency/thread.rs
@@ -8,7 +8,6 @@ use std::task::Poll;
 use std::time::{Duration, SystemTime};
 
 use either::Either;
-use log::trace;
 
 use rustc_data_structures::fx::FxHashMap;
 use rustc_hir::def_id::DefId;
diff --git a/src/tools/miri/src/diagnostics.rs b/src/tools/miri/src/diagnostics.rs
index bf3284df5967a..7565bb580f946 100644
--- a/src/tools/miri/src/diagnostics.rs
+++ b/src/tools/miri/src/diagnostics.rs
@@ -1,8 +1,6 @@
 use std::fmt::{self, Write};
 use std::num::NonZeroU64;
 
-use log::trace;
-
 use rustc_errors::{DiagnosticBuilder, DiagnosticMessage, Level};
 use rustc_span::{SpanData, Symbol, DUMMY_SP};
 use rustc_target::abi::{Align, Size};
diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs
index 6095b8842eb60..9bab9488e378e 100644
--- a/src/tools/miri/src/eval.rs
+++ b/src/tools/miri/src/eval.rs
@@ -7,9 +7,6 @@ use std::path::PathBuf;
 use std::task::Poll;
 use std::thread;
 
-use log::info;
-use rustc_middle::ty::Ty;
-
 use crate::concurrency::thread::TlsAllocAction;
 use crate::diagnostics::report_leaks;
 use rustc_data_structures::fx::FxHashSet;
@@ -18,7 +15,7 @@ use rustc_hir::def_id::DefId;
 use rustc_middle::ty::{
     self,
     layout::{LayoutCx, LayoutOf},
-    TyCtxt,
+    Ty, TyCtxt,
 };
 use rustc_target::spec::abi::Abi;
 
diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs
index d6b1e1358086c..932a35d9bf047 100644
--- a/src/tools/miri/src/helpers.rs
+++ b/src/tools/miri/src/helpers.rs
@@ -3,8 +3,6 @@ use std::iter;
 use std::num::NonZeroUsize;
 use std::time::Duration;
 
-use log::trace;
-
 use rustc_apfloat::ieee::{Double, Single};
 use rustc_apfloat::Float;
 use rustc_hir::def::{DefKind, Namespace};
diff --git a/src/tools/miri/src/intptrcast.rs b/src/tools/miri/src/intptrcast.rs
index 68c9a7660ebda..3fe127f973269 100644
--- a/src/tools/miri/src/intptrcast.rs
+++ b/src/tools/miri/src/intptrcast.rs
@@ -2,7 +2,6 @@ use std::cell::RefCell;
 use std::cmp::max;
 use std::collections::hash_map::Entry;
 
-use log::trace;
 use rand::Rng;
 
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs
index 057b883a3bf9e..6fbf28a752040 100644
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@@ -60,6 +60,8 @@ extern crate rustc_middle;
 extern crate rustc_session;
 extern crate rustc_span;
 extern crate rustc_target;
+#[macro_use]
+extern crate tracing;
 
 // Necessary to pull in object code as the rest of the rustc crates are shipped only as rmeta
 // files.
diff --git a/src/tools/miri/src/operator.rs b/src/tools/miri/src/operator.rs
index 6f19dead2e9bf..d99be39177bcf 100644
--- a/src/tools/miri/src/operator.rs
+++ b/src/tools/miri/src/operator.rs
@@ -1,7 +1,5 @@
 use std::iter;
 
-use log::trace;
-
 use rand::{seq::IteratorRandom, Rng};
 use rustc_apfloat::{Float, FloatConvert};
 use rustc_middle::mir;
diff --git a/src/tools/miri/src/shims/foreign_items.rs b/src/tools/miri/src/shims/foreign_items.rs
index a002f2aad057c..dd3e8efb6ef39 100644
--- a/src/tools/miri/src/shims/foreign_items.rs
+++ b/src/tools/miri/src/shims/foreign_items.rs
@@ -1,7 +1,5 @@
 use std::{collections::hash_map::Entry, io::Write, iter, path::Path};
 
-use log::trace;
-
 use rustc_apfloat::Float;
 use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_hir::{
diff --git a/src/tools/miri/src/shims/intrinsics/mod.rs b/src/tools/miri/src/shims/intrinsics/mod.rs
index e34fb118f7234..7a5a079c1cb41 100644
--- a/src/tools/miri/src/shims/intrinsics/mod.rs
+++ b/src/tools/miri/src/shims/intrinsics/mod.rs
@@ -3,8 +3,6 @@ mod simd;
 
 use std::iter;
 
-use log::trace;
-
 use rand::Rng;
 use rustc_apfloat::{Float, Round};
 use rustc_middle::ty::layout::LayoutOf;
diff --git a/src/tools/miri/src/shims/panic.rs b/src/tools/miri/src/shims/panic.rs
index 28652c25c2450..4c054d8dc8aea 100644
--- a/src/tools/miri/src/shims/panic.rs
+++ b/src/tools/miri/src/shims/panic.rs
@@ -11,8 +11,6 @@
 //!   gets popped *during unwinding*, we take the panic payload and store it according to the extra
 //!   metadata we remembered when pushing said frame.
 
-use log::trace;
-
 use rustc_ast::Mutability;
 use rustc_middle::{mir, ty};
 use rustc_span::Symbol;
diff --git a/src/tools/miri/src/shims/tls.rs b/src/tools/miri/src/shims/tls.rs
index b319516c25b9e..84c1feb88e959 100644
--- a/src/tools/miri/src/shims/tls.rs
+++ b/src/tools/miri/src/shims/tls.rs
@@ -4,8 +4,6 @@ use std::collections::btree_map::Entry as BTreeEntry;
 use std::collections::BTreeMap;
 use std::task::Poll;
 
-use log::trace;
-
 use rustc_middle::ty;
 use rustc_target::abi::{HasDataLayout, Size};
 use rustc_target::spec::abi::Abi;
diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs
index 35036ce078d37..30c1fe7add5a1 100644
--- a/src/tools/miri/src/shims/unix/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/foreign_items.rs
@@ -1,8 +1,6 @@
 use std::ffi::OsStr;
 use std::str;
 
-use log::trace;
-
 use rustc_middle::ty::layout::LayoutOf;
 use rustc_span::Symbol;
 use rustc_target::abi::{Align, Size};
diff --git a/src/tools/miri/src/shims/unix/fs.rs b/src/tools/miri/src/shims/unix/fs.rs
index 53f975baa89a5..b141ca4a019c3 100644
--- a/src/tools/miri/src/shims/unix/fs.rs
+++ b/src/tools/miri/src/shims/unix/fs.rs
@@ -8,8 +8,6 @@ use std::io::{self, ErrorKind, IsTerminal, Read, Seek, SeekFrom, Write};
 use std::path::{Path, PathBuf};
 use std::time::SystemTime;
 
-use log::trace;
-
 use rustc_data_structures::fx::FxHashMap;
 use rustc_middle::ty::TyCtxt;
 use rustc_target::abi::Size;

From a6f1dbac79be41db8042920dd723f56d2c2e3154 Mon Sep 17 00:00:00 2001
From: Dan Gohman <dev@sunfishcode.online>
Date: Tue, 30 Jan 2024 08:28:12 -0500
Subject: [PATCH 03/30] Implement the `mmap64` foreign item.

`mmap64` is like `mmap` but uses a 64-bit integer instead of `off_t` for
the offset parameter.
---
 .../miri/src/shims/unix/foreign_items.rs      |   7 ++
 src/tools/miri/src/shims/unix/mem.rs          |   3 +-
 src/tools/miri/tests/pass-dep/shims/mmap.rs   | 114 ++++++++++++++++++
 3 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs
index 35036ce078d37..0f992e957c2d9 100644
--- a/src/tools/miri/src/shims/unix/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/foreign_items.rs
@@ -262,9 +262,16 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
 
             "mmap" => {
                 let [addr, length, prot, flags, fd, offset] = this.check_shim(abi, Abi::C {unwind: false}, link_name, args)?;
+                let offset = this.read_scalar(offset)?.to_int(this.libc_ty_layout("off_t").size)?;
                 let ptr = this.mmap(addr, length, prot, flags, fd, offset)?;
                 this.write_scalar(ptr, dest)?;
             }
+            "mmap64" => {
+                let [addr, length, prot, flags, fd, offset] = this.check_shim(abi, Abi::C {unwind: false}, link_name, args)?;
+                let offset = this.read_scalar(offset)?.to_i64()?;
+                let ptr = this.mmap(addr, length, prot, flags, fd, offset.into())?;
+                this.write_scalar(ptr, dest)?;
+            }
             "munmap" => {
                 let [addr, length] = this.check_shim(abi, Abi::C {unwind: false}, link_name, args)?;
                 let result = this.munmap(addr, length)?;
diff --git a/src/tools/miri/src/shims/unix/mem.rs b/src/tools/miri/src/shims/unix/mem.rs
index d7dc17fa89f13..d3470893dbbca 100644
--- a/src/tools/miri/src/shims/unix/mem.rs
+++ b/src/tools/miri/src/shims/unix/mem.rs
@@ -26,7 +26,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
         prot: &OpTy<'tcx, Provenance>,
         flags: &OpTy<'tcx, Provenance>,
         fd: &OpTy<'tcx, Provenance>,
-        offset: &OpTy<'tcx, Provenance>,
+        offset: i128,
     ) -> InterpResult<'tcx, Scalar<Provenance>> {
         let this = self.eval_context_mut();
 
@@ -36,7 +36,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
         let prot = this.read_scalar(prot)?.to_i32()?;
         let flags = this.read_scalar(flags)?.to_i32()?;
         let fd = this.read_scalar(fd)?.to_i32()?;
-        let offset = this.read_target_usize(offset)?;
 
         let map_private = this.eval_libc_i32("MAP_PRIVATE");
         let map_anonymous = this.eval_libc_i32("MAP_ANONYMOUS");
diff --git a/src/tools/miri/tests/pass-dep/shims/mmap.rs b/src/tools/miri/tests/pass-dep/shims/mmap.rs
index e19f54d0687df..08faf76c00d37 100644
--- a/src/tools/miri/tests/pass-dep/shims/mmap.rs
+++ b/src/tools/miri/tests/pass-dep/shims/mmap.rs
@@ -116,6 +116,118 @@ fn test_mmap() {
     assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
 }
 
+#[cfg(target_os = "linux")]
+fn test_mmap64() {
+    let page_size = page_size::get();
+    let ptr = unsafe {
+        libc::mmap64(
+            ptr::null_mut(),
+            page_size,
+            libc::PROT_READ | libc::PROT_WRITE,
+            libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
+            -1,
+            0,
+        )
+    };
+    assert!(!ptr.is_null());
+
+    // Ensure that freshly mapped allocations are zeroed
+    let slice = unsafe { slice::from_raw_parts_mut(ptr as *mut u8, page_size) };
+    assert!(slice.iter().all(|b| *b == 0));
+
+    // Do some writes, make sure they worked
+    for b in slice.iter_mut() {
+        *b = 1;
+    }
+    assert!(slice.iter().all(|b| *b == 1));
+
+    // Ensure that we can munmap
+    let res = unsafe { libc::munmap(ptr, page_size) };
+    assert_eq!(res, 0i32);
+
+    // Test all of our error conditions
+    let ptr = unsafe {
+        libc::mmap64(
+            ptr::null_mut(),
+            page_size,
+            libc::PROT_READ | libc::PROT_WRITE,
+            libc::MAP_PRIVATE | libc::MAP_SHARED, // Can't be both private and shared
+            -1,
+            0,
+        )
+    };
+    assert_eq!(ptr, libc::MAP_FAILED);
+    assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
+
+    let ptr = unsafe {
+        libc::mmap64(
+            ptr::null_mut(),
+            0, // Can't map no memory
+            libc::PROT_READ | libc::PROT_WRITE,
+            libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
+            -1,
+            0,
+        )
+    };
+    assert_eq!(ptr, libc::MAP_FAILED);
+    assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
+
+    let ptr = unsafe {
+        libc::mmap64(
+            ptr::invalid_mut(page_size * 64),
+            page_size,
+            libc::PROT_READ | libc::PROT_WRITE,
+            // We don't support MAP_FIXED
+            libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_FIXED,
+            -1,
+            0,
+        )
+    };
+    assert_eq!(ptr, libc::MAP_FAILED);
+    assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::ENOTSUP);
+
+    // We don't support protections other than read+write
+    for prot in [libc::PROT_NONE, libc::PROT_EXEC, libc::PROT_READ, libc::PROT_WRITE] {
+        let ptr = unsafe {
+            libc::mmap64(
+                ptr::null_mut(),
+                page_size,
+                prot,
+                libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
+                -1,
+                0,
+            )
+        };
+        assert_eq!(ptr, libc::MAP_FAILED);
+        assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::ENOTSUP);
+    }
+
+    // We report an error for mappings whose length cannot be rounded up to a multiple of
+    // the page size.
+    let ptr = unsafe {
+        libc::mmap64(
+            ptr::null_mut(),
+            usize::MAX - 1,
+            libc::PROT_READ | libc::PROT_WRITE,
+            libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
+            -1,
+            0,
+        )
+    };
+    assert_eq!(ptr, libc::MAP_FAILED);
+
+    // We report an error when trying to munmap an address which is not a multiple of the page size
+    let res = unsafe { libc::munmap(ptr::invalid_mut(1), page_size) };
+    assert_eq!(res, -1);
+    assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
+
+    // We report an error when trying to munmap a length that cannot be rounded up to a multiple of
+    // the page size.
+    let res = unsafe { libc::munmap(ptr::invalid_mut(page_size), usize::MAX - 1) };
+    assert_eq!(res, -1);
+    assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
+}
+
 #[cfg(target_os = "linux")]
 fn test_mremap() {
     let page_size = page_size::get();
@@ -165,5 +277,7 @@ fn test_mremap() {
 fn main() {
     test_mmap();
     #[cfg(target_os = "linux")]
+    test_mmap64();
+    #[cfg(target_os = "linux")]
     test_mremap();
 }

From e47bb909286ccf58059c2bec217e21c82432a185 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Tue, 30 Jan 2024 19:00:16 +0000
Subject: [PATCH 04/30] add num_cpus test

---
 src/tools/miri/ci/ci.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/tools/miri/ci/ci.sh b/src/tools/miri/ci/ci.sh
index 6bcc68ebf7ccd..9d2c3f362e6d1 100755
--- a/src/tools/miri/ci/ci.sh
+++ b/src/tools/miri/ci/ci.sh
@@ -121,8 +121,9 @@ case $HOST_TARGET in
     MIRI_TEST_TARGET=aarch64-apple-darwin run_tests
     MIRI_TEST_TARGET=i686-pc-windows-gnu run_tests
     # Some targets are only partially supported.
-    MIRI_TEST_TARGET=x86_64-unknown-freebsd run_tests_minimal hello integer vec panic/panic concurrency/simple pthread-threadname libc-getentropy libc-getrandom libc-misc libc-fs atomic env align
-    MIRI_TEST_TARGET=i686-unknown-freebsd run_tests_minimal hello integer vec panic/panic concurrency/simple pthread-threadname libc-getentropy libc-getrandom libc-misc libc-fs atomic env align
+    MIRI_TEST_TARGET=x86_64-unknown-freebsd run_tests_minimal hello integer vec panic/panic concurrency/simple pthread-threadname libc-getentropy libc-getrandom libc-misc libc-fs atomic env align num_cpus
+    MIRI_TEST_TARGET=i686-unknown-freebsd run_tests_minimal hello integer vec panic/panic concurrency/simple pthread-threadname libc-getentropy libc-getrandom libc-misc libc-fs atomic env align num_cpus
+
     MIRI_TEST_TARGET=aarch64-linux-android run_tests_minimal hello integer vec panic/panic
     MIRI_TEST_TARGET=wasm32-wasi run_tests_minimal no_std integer strings wasm
     MIRI_TEST_TARGET=wasm32-unknown-unknown run_tests_minimal no_std integer strings wasm

From 63524ee2161dfe6a20f1a757b18e9872b50079bb Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Wed, 31 Jan 2024 04:54:35 +0000
Subject: [PATCH 05/30] Preparing for merge from rustc

---
 src/tools/miri/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 6624672775f9d..e36763cc7ab31 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-dd2559e08e1530806740931037d6bb83ef956161
+80deabd0987201e1b8d060400f50e03309a0105e

From 24fece28f63311d26790524bcf581e32cb6cc644 Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Wed, 31 Jan 2024 05:02:33 +0000
Subject: [PATCH 06/30] fmt

---
 src/tools/miri/src/diagnostics.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/tools/miri/src/diagnostics.rs b/src/tools/miri/src/diagnostics.rs
index 92c58d48dc757..cb009fd5cfd18 100644
--- a/src/tools/miri/src/diagnostics.rs
+++ b/src/tools/miri/src/diagnostics.rs
@@ -102,10 +102,7 @@ impl MachineStopType for TerminationInfo {
     }
     fn add_args(
         self: Box<Self>,
-        _: &mut dyn FnMut(
-            std::borrow::Cow<'static, str>,
-            rustc_errors::DiagnosticArgValue,
-        ),
+        _: &mut dyn FnMut(std::borrow::Cow<'static, str>, rustc_errors::DiagnosticArgValue),
     ) {
     }
 }

From 5587a37384f3fc0e266196f4b009d442a7e0552d Mon Sep 17 00:00:00 2001
From: Tommi Virtanen <tv@eagain.net>
Date: Sun, 4 Feb 2024 13:24:36 -0700
Subject: [PATCH 07/30] Stop recommending cargo clean in README

Miri has used the `target/miri` subdirectory since 2021 to keep itself
separate from non-miri builds, so this should not be necessary.

See commit 6a18683d09f74ead9438aa7fdd323b18821f7709

Since the items are no longer a sequence of steps to do in
order ("first, make sure that ..."), switch to an unordered list while
we're at it.

Closes #3289
---
 src/tools/miri/README.md | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md
index 6695f123c7836..60bf07b1736e4 100644
--- a/src/tools/miri/README.md
+++ b/src/tools/miri/README.md
@@ -108,11 +108,8 @@ assume the right toolchain is pinned via `rustup override set nightly` or
 
 Now you can run your project in Miri:
 
-1. Run `cargo clean` to eliminate any cached dependencies. Miri needs your
-   dependencies to be compiled the right way, that would not happen if they have
-   previously already been compiled.
-2. To run all tests in your project through Miri, use `cargo miri test`.
-3. If you have a binary project, you can run it through Miri using `cargo miri run`.
+- To run all tests in your project through Miri, use `cargo miri test`.
+- If you have a binary project, you can run it through Miri using `cargo miri run`.
 
 The first time you run Miri, it will perform some extra setup and install some
 dependencies. It will ask you for confirmation before installing anything.

From eaa2da5d921225266d714d8d28b87cc8d131bcae Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Tue, 6 Feb 2024 05:10:11 +0000
Subject: [PATCH 08/30] Preparing for merge from rustc

---
 src/tools/miri/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index e36763cc7ab31..f3e10689ff1d4 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-80deabd0987201e1b8d060400f50e03309a0105e
+f3b9d47a46c6d237665801155aa3e26c5f49958b

From cab1cc48ec25f1b2531a271abd2a6c04087dcc61 Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Thu, 8 Feb 2024 05:06:15 +0000
Subject: [PATCH 09/30] Preparing for merge from rustc

---
 src/tools/miri/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index f3e10689ff1d4..17e50400cec10 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-f3b9d47a46c6d237665801155aa3e26c5f49958b
+384b02c0825cefa59f2e8a99a33d9a5344959079

From 964a576dc026f4805df652776abf8a22664815be Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Thu, 8 Feb 2024 05:14:42 +0000
Subject: [PATCH 10/30] fmt

---
 .../issue-miri-3288-ice-symbolic-alignment-extern-static.rs    | 3 +--
 src/tools/miri/tests/pass/align_offset_symbolic.rs             | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/tools/miri/tests/fail/issue-miri-3288-ice-symbolic-alignment-extern-static.rs b/src/tools/miri/tests/fail/issue-miri-3288-ice-symbolic-alignment-extern-static.rs
index 446040749822e..fef5a6cddb91d 100644
--- a/src/tools/miri/tests/fail/issue-miri-3288-ice-symbolic-alignment-extern-static.rs
+++ b/src/tools/miri/tests/fail/issue-miri-3288-ice-symbolic-alignment-extern-static.rs
@@ -4,8 +4,7 @@ extern "C" {
     static _dispatch_queue_attr_concurrent: [u8; 0];
 }
 
-static DISPATCH_QUEUE_CONCURRENT: &'static [u8; 0] =
-    unsafe { &_dispatch_queue_attr_concurrent };
+static DISPATCH_QUEUE_CONCURRENT: &'static [u8; 0] = unsafe { &_dispatch_queue_attr_concurrent };
 
 fn main() {
     let _val = *DISPATCH_QUEUE_CONCURRENT; //~ERROR: is not supported
diff --git a/src/tools/miri/tests/pass/align_offset_symbolic.rs b/src/tools/miri/tests/pass/align_offset_symbolic.rs
index e96f11b1efa72..ac28c63e08110 100644
--- a/src/tools/miri/tests/pass/align_offset_symbolic.rs
+++ b/src/tools/miri/tests/pass/align_offset_symbolic.rs
@@ -113,7 +113,7 @@ fn vtable() {
 
     let ptr: &dyn Send = &0;
     let parts: (*const (), *const u8) = unsafe { mem::transmute(ptr) };
-    let vtable = parts.1 ;
+    let vtable = parts.1;
     let offset = vtable.align_offset(mem::align_of::<TWOPTR>());
     let _vtable_aligned = vtable.wrapping_add(offset) as *const [TWOPTR; 0];
     // FIXME: we can't actually do the access since vtable pointers act like zero-sized allocations.

From d4b8ed007376f869f69c389c900d5fb3a9c4b5ac Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Sat, 10 Feb 2024 04:55:22 +0000
Subject: [PATCH 11/30] Preparing for merge from rustc

---
 src/tools/miri/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 17e50400cec10..7d077a9c54785 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-384b02c0825cefa59f2e8a99a33d9a5344959079
+b5c46dc5426038a49c95398bce30eeb20ec421e2

From 45d5d4b1b3e728f2da0707062482d3bb4611586f Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Sat, 10 Feb 2024 05:03:42 +0000
Subject: [PATCH 12/30] fmt

---
 src/tools/miri/src/bin/miri.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs
index aaa2a2e270590..9319877472e24 100644
--- a/src/tools/miri/src/bin/miri.rs
+++ b/src/tools/miri/src/bin/miri.rs
@@ -4,7 +4,7 @@
     clippy::useless_format,
     clippy::field_reassign_with_default,
     rustc::diagnostic_outside_of_impl,
-    rustc::untranslatable_diagnostic,
+    rustc::untranslatable_diagnostic
 )]
 
 extern crate rustc_data_structures;

From aa406341b443b30401f863ba1fad05dcb83d4dd9 Mon Sep 17 00:00:00 2001
From: Dan Gohman <dev@sunfishcode.online>
Date: Sat, 10 Feb 2024 08:10:04 -0800
Subject: [PATCH 13/30] Factor out the redundancy between `test_mmap` and
 `test_mmap64`.

---
 src/tools/miri/tests/pass-dep/shims/mmap.rs | 151 ++++----------------
 1 file changed, 24 insertions(+), 127 deletions(-)

diff --git a/src/tools/miri/tests/pass-dep/shims/mmap.rs b/src/tools/miri/tests/pass-dep/shims/mmap.rs
index 08faf76c00d37..7bbb9dd53cb87 100644
--- a/src/tools/miri/tests/pass-dep/shims/mmap.rs
+++ b/src/tools/miri/tests/pass-dep/shims/mmap.rs
@@ -5,128 +5,25 @@
 use std::io::Error;
 use std::{ptr, slice};
 
-fn test_mmap() {
+fn test_mmap<Offset: Default>(
+    mmap: unsafe extern "C" fn(
+        *mut libc::c_void,
+        libc::size_t,
+        libc::c_int,
+        libc::c_int,
+        libc::c_int,
+        Offset,
+    ) -> *mut libc::c_void,
+) {
     let page_size = page_size::get();
     let ptr = unsafe {
-        libc::mmap(
-            ptr::null_mut(),
-            page_size,
-            libc::PROT_READ | libc::PROT_WRITE,
-            libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
-            -1,
-            0,
-        )
-    };
-    assert!(!ptr.is_null());
-
-    // Ensure that freshly mapped allocations are zeroed
-    let slice = unsafe { slice::from_raw_parts_mut(ptr as *mut u8, page_size) };
-    assert!(slice.iter().all(|b| *b == 0));
-
-    // Do some writes, make sure they worked
-    for b in slice.iter_mut() {
-        *b = 1;
-    }
-    assert!(slice.iter().all(|b| *b == 1));
-
-    // Ensure that we can munmap
-    let res = unsafe { libc::munmap(ptr, page_size) };
-    assert_eq!(res, 0i32);
-
-    // Test all of our error conditions
-    let ptr = unsafe {
-        libc::mmap(
-            ptr::null_mut(),
-            page_size,
-            libc::PROT_READ | libc::PROT_WRITE,
-            libc::MAP_PRIVATE | libc::MAP_SHARED, // Can't be both private and shared
-            -1,
-            0,
-        )
-    };
-    assert_eq!(ptr, libc::MAP_FAILED);
-    assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
-
-    let ptr = unsafe {
-        libc::mmap(
-            ptr::null_mut(),
-            0, // Can't map no memory
-            libc::PROT_READ | libc::PROT_WRITE,
-            libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
-            -1,
-            0,
-        )
-    };
-    assert_eq!(ptr, libc::MAP_FAILED);
-    assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
-
-    let ptr = unsafe {
-        libc::mmap(
-            ptr::invalid_mut(page_size * 64),
-            page_size,
-            libc::PROT_READ | libc::PROT_WRITE,
-            // We don't support MAP_FIXED
-            libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_FIXED,
-            -1,
-            0,
-        )
-    };
-    assert_eq!(ptr, libc::MAP_FAILED);
-    assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::ENOTSUP);
-
-    // We don't support protections other than read+write
-    for prot in [libc::PROT_NONE, libc::PROT_EXEC, libc::PROT_READ, libc::PROT_WRITE] {
-        let ptr = unsafe {
-            libc::mmap(
-                ptr::null_mut(),
-                page_size,
-                prot,
-                libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
-                -1,
-                0,
-            )
-        };
-        assert_eq!(ptr, libc::MAP_FAILED);
-        assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::ENOTSUP);
-    }
-
-    // We report an error for mappings whose length cannot be rounded up to a multiple of
-    // the page size.
-    let ptr = unsafe {
-        libc::mmap(
-            ptr::null_mut(),
-            usize::MAX - 1,
-            libc::PROT_READ | libc::PROT_WRITE,
-            libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
-            -1,
-            0,
-        )
-    };
-    assert_eq!(ptr, libc::MAP_FAILED);
-
-    // We report an error when trying to munmap an address which is not a multiple of the page size
-    let res = unsafe { libc::munmap(ptr::invalid_mut(1), page_size) };
-    assert_eq!(res, -1);
-    assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
-
-    // We report an error when trying to munmap a length that cannot be rounded up to a multiple of
-    // the page size.
-    let res = unsafe { libc::munmap(ptr::invalid_mut(page_size), usize::MAX - 1) };
-    assert_eq!(res, -1);
-    assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
-}
-
-#[cfg(target_os = "linux")]
-fn test_mmap64() {
-    let page_size = page_size::get();
-    let ptr = unsafe {
-        libc::mmap64(
+        mmap(
             ptr::null_mut(),
             page_size,
             libc::PROT_READ | libc::PROT_WRITE,
             libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
             -1,
-            0,
+            Default::default(),
         )
     };
     assert!(!ptr.is_null());
@@ -147,40 +44,40 @@ fn test_mmap64() {
 
     // Test all of our error conditions
     let ptr = unsafe {
-        libc::mmap64(
+        mmap(
             ptr::null_mut(),
             page_size,
             libc::PROT_READ | libc::PROT_WRITE,
             libc::MAP_PRIVATE | libc::MAP_SHARED, // Can't be both private and shared
             -1,
-            0,
+            Default::default(),
         )
     };
     assert_eq!(ptr, libc::MAP_FAILED);
     assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
 
     let ptr = unsafe {
-        libc::mmap64(
+        mmap(
             ptr::null_mut(),
             0, // Can't map no memory
             libc::PROT_READ | libc::PROT_WRITE,
             libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
             -1,
-            0,
+            Default::default(),
         )
     };
     assert_eq!(ptr, libc::MAP_FAILED);
     assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL);
 
     let ptr = unsafe {
-        libc::mmap64(
+        mmap(
             ptr::invalid_mut(page_size * 64),
             page_size,
             libc::PROT_READ | libc::PROT_WRITE,
             // We don't support MAP_FIXED
             libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_FIXED,
             -1,
-            0,
+            Default::default(),
         )
     };
     assert_eq!(ptr, libc::MAP_FAILED);
@@ -189,13 +86,13 @@ fn test_mmap64() {
     // We don't support protections other than read+write
     for prot in [libc::PROT_NONE, libc::PROT_EXEC, libc::PROT_READ, libc::PROT_WRITE] {
         let ptr = unsafe {
-            libc::mmap64(
+            mmap(
                 ptr::null_mut(),
                 page_size,
                 prot,
                 libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
                 -1,
-                0,
+                Default::default(),
             )
         };
         assert_eq!(ptr, libc::MAP_FAILED);
@@ -205,13 +102,13 @@ fn test_mmap64() {
     // We report an error for mappings whose length cannot be rounded up to a multiple of
     // the page size.
     let ptr = unsafe {
-        libc::mmap64(
+        mmap(
             ptr::null_mut(),
             usize::MAX - 1,
             libc::PROT_READ | libc::PROT_WRITE,
             libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
             -1,
-            0,
+            Default::default(),
         )
     };
     assert_eq!(ptr, libc::MAP_FAILED);
@@ -275,9 +172,9 @@ fn test_mremap() {
 }
 
 fn main() {
-    test_mmap();
+    test_mmap(libc::mmap);
     #[cfg(target_os = "linux")]
-    test_mmap64();
+    test_mmap(libc::mmap64);
     #[cfg(target_os = "linux")]
     test_mremap();
 }

From 0913e227d3eb1eaa691191b4e13fb7eb2ee883fa Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Sun, 11 Feb 2024 04:56:53 +0000
Subject: [PATCH 14/30] Preparing for merge from rustc

---
 src/tools/miri/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 7d077a9c54785..11d4766b93cc8 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-b5c46dc5426038a49c95398bce30eeb20ec421e2
+0cbef48150e1fab161b5fd147b57ceb3f9272a52

From 48bb2bf4e6bfc5c9ffc9cb3f34a9163c05ce2cb2 Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Sun, 11 Feb 2024 05:05:04 +0000
Subject: [PATCH 15/30] fmt

---
 src/tools/miri/src/diagnostics.rs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/tools/miri/src/diagnostics.rs b/src/tools/miri/src/diagnostics.rs
index 03f0bf9f327cc..7825673db00e9 100644
--- a/src/tools/miri/src/diagnostics.rs
+++ b/src/tools/miri/src/diagnostics.rs
@@ -285,7 +285,10 @@ pub fn report_error<'tcx, 'mir>(
                 ) =>
             {
                 ecx.handle_ice(); // print interpreter backtrace
-                bug!("This validation error should be impossible in Miri: {}", format_interp_error(ecx.tcx.dcx(), e));
+                bug!(
+                    "This validation error should be impossible in Miri: {}",
+                    format_interp_error(ecx.tcx.dcx(), e)
+                );
             }
             UndefinedBehavior(_) => "Undefined Behavior",
             ResourceExhaustion(_) => "resource exhaustion",
@@ -299,7 +302,10 @@ pub fn report_error<'tcx, 'mir>(
             ) => "post-monomorphization error",
             _ => {
                 ecx.handle_ice(); // print interpreter backtrace
-                bug!("This error should be impossible in Miri: {}", format_interp_error(ecx.tcx.dcx(), e));
+                bug!(
+                    "This error should be impossible in Miri: {}",
+                    format_interp_error(ecx.tcx.dcx(), e)
+                );
             }
         };
         #[rustfmt::skip]

From 438f43c7a0afdb54a4e443d7c60cdb3d32a84981 Mon Sep 17 00:00:00 2001
From: Dan Gohman <dev@sunfishcode.online>
Date: Sun, 11 Feb 2024 05:09:27 -0800
Subject: [PATCH 16/30] Make the `mmap64` implementation Linux-specific.

`mmap64` is only defined on Linux, so move the implementation
to Linux-specific code.
---
 src/tools/miri/src/shims/unix/foreign_items.rs       | 6 ------
 src/tools/miri/src/shims/unix/linux/foreign_items.rs | 9 +++++++++
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs
index 0f992e957c2d9..87cfe2907bdad 100644
--- a/src/tools/miri/src/shims/unix/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/foreign_items.rs
@@ -266,12 +266,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 let ptr = this.mmap(addr, length, prot, flags, fd, offset)?;
                 this.write_scalar(ptr, dest)?;
             }
-            "mmap64" => {
-                let [addr, length, prot, flags, fd, offset] = this.check_shim(abi, Abi::C {unwind: false}, link_name, args)?;
-                let offset = this.read_scalar(offset)?.to_i64()?;
-                let ptr = this.mmap(addr, length, prot, flags, fd, offset.into())?;
-                this.write_scalar(ptr, dest)?;
-            }
             "munmap" => {
                 let [addr, length] = this.check_shim(abi, Abi::C {unwind: false}, link_name, args)?;
                 let result = this.munmap(addr, length)?;
diff --git a/src/tools/miri/src/shims/unix/linux/foreign_items.rs b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
index 6937e0f089ec8..ee4e88e689b2b 100644
--- a/src/tools/miri/src/shims/unix/linux/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
@@ -9,6 +9,7 @@ use shims::unix::fs::EvalContextExt as _;
 use shims::unix::linux::fd::EvalContextExt as _;
 use shims::unix::linux::mem::EvalContextExt as _;
 use shims::unix::linux::sync::futex;
+use shims::unix::mem::EvalContextExt as _;
 use shims::unix::sync::EvalContextExt as _;
 use shims::unix::thread::EvalContextExt as _;
 
@@ -217,6 +218,14 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 this.write_null(dest)?;
             }
 
+            "mmap64" => {
+                let [addr, length, prot, flags, fd, offset] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+                let offset = this.read_scalar(offset)?.to_i64()?;
+                let ptr = this.mmap(addr, length, prot, flags, fd, offset.into())?;
+                this.write_scalar(ptr, dest)?;
+            }
+
             _ => return Ok(EmulateForeignItemResult::NotSupported),
         };
 

From 064ad45bf77006a9a6d2d9199eca87cc2a8ac94f Mon Sep 17 00:00:00 2001
From: Dan Gohman <dev@sunfishcode.online>
Date: Sun, 11 Feb 2024 05:26:53 -0800
Subject: [PATCH 17/30] Move `mmap64`'s implementation under "File related
 shims".

---
 .../miri/src/shims/unix/linux/foreign_items.rs   | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/tools/miri/src/shims/unix/linux/foreign_items.rs b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
index ee4e88e689b2b..a05a6470a5db3 100644
--- a/src/tools/miri/src/shims/unix/linux/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
@@ -44,6 +44,14 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 let result = this.linux_readdir64(dirp)?;
                 this.write_scalar(result, dest)?;
             }
+            "mmap64" => {
+                let [addr, length, prot, flags, fd, offset] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+                let offset = this.read_scalar(offset)?.to_i64()?;
+                let ptr = this.mmap(addr, length, prot, flags, fd, offset.into())?;
+                this.write_scalar(ptr, dest)?;
+            }
+
             // Linux-only
             "sync_file_range" => {
                 let [fd, offset, nbytes, flags] =
@@ -218,14 +226,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 this.write_null(dest)?;
             }
 
-            "mmap64" => {
-                let [addr, length, prot, flags, fd, offset] =
-                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-                let offset = this.read_scalar(offset)?.to_i64()?;
-                let ptr = this.mmap(addr, length, prot, flags, fd, offset.into())?;
-                this.write_scalar(ptr, dest)?;
-            }
-
             _ => return Ok(EmulateForeignItemResult::NotSupported),
         };
 

From b38d871eec14628e59d234ca7cc6e1e525008164 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Sun, 11 Feb 2024 17:42:56 +0100
Subject: [PATCH 18/30] add tests for imported_main

---
 src/tools/miri/tests/pass/imported_main.rs     | 8 ++++++++
 src/tools/miri/tests/pass/imported_main.stdout | 1 +
 2 files changed, 9 insertions(+)
 create mode 100644 src/tools/miri/tests/pass/imported_main.rs
 create mode 100644 src/tools/miri/tests/pass/imported_main.stdout

diff --git a/src/tools/miri/tests/pass/imported_main.rs b/src/tools/miri/tests/pass/imported_main.rs
new file mode 100644
index 0000000000000..32b39152f7839
--- /dev/null
+++ b/src/tools/miri/tests/pass/imported_main.rs
@@ -0,0 +1,8 @@
+#![feature(imported_main)]
+
+pub mod foo {
+    pub fn mymain() {
+        println!("Hello, world!");
+    }
+}
+use foo::mymain as main;
diff --git a/src/tools/miri/tests/pass/imported_main.stdout b/src/tools/miri/tests/pass/imported_main.stdout
new file mode 100644
index 0000000000000..af5626b4a114a
--- /dev/null
+++ b/src/tools/miri/tests/pass/imported_main.stdout
@@ -0,0 +1 @@
+Hello, world!

From ca7a4b55d7b0b3c59fdd6e4e148b7d98201be864 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Mon, 12 Feb 2024 13:15:40 +0100
Subject: [PATCH 19/30] Preparing for merge from rustc

---
 src/tools/miri/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 11d4766b93cc8..2c98082bc1e0c 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-0cbef48150e1fab161b5fd147b57ceb3f9272a52
+b17491c8f6d555386104dfd82004c01bfef09c95

From fe4d3274f0402c7d526bebd5b66f42df649da8e1 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Mon, 12 Feb 2024 13:19:49 +0100
Subject: [PATCH 20/30] comment tweaks

---
 src/tools/miri/src/concurrency/sync.rs                      | 6 +++---
 .../tests/fail/enum-set-discriminant-niche-variant-wrong.rs | 6 +++---
 .../miri/tests/pass-dep/concurrency/libc_pthread_cond.rs    | 2 +-
 .../pass-dep/concurrency/libc_pthread_cond_isolated.rs      | 2 +-
 src/tools/miri/tests/pass-dep/shims/pthread-sync.rs         | 5 ++---
 5 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/tools/miri/src/concurrency/sync.rs b/src/tools/miri/src/concurrency/sync.rs
index 42b9161afdb8c..5d79b9fab0dcf 100644
--- a/src/tools/miri/src/concurrency/sync.rs
+++ b/src/tools/miri/src/concurrency/sync.rs
@@ -70,7 +70,7 @@ struct Mutex {
     lock_count: usize,
     /// The queue of threads waiting for this mutex.
     queue: VecDeque<ThreadId>,
-    /// Data race handle, this tracks the happens-before
+    /// Data race handle. This tracks the happens-before
     /// relationship between each mutex access. It is
     /// released to during unlock and acquired from during
     /// locking, and therefore stores the clock of the last
@@ -92,7 +92,7 @@ struct RwLock {
     writer_queue: VecDeque<ThreadId>,
     /// The queue of reader threads waiting for this lock.
     reader_queue: VecDeque<ThreadId>,
-    /// Data race handle for writers, tracks the happens-before
+    /// Data race handle for writers. Tracks the happens-before
     /// ordering between each write access to a rwlock and is updated
     /// after a sequence of concurrent readers to track the happens-
     /// before ordering between the set of previous readers and
@@ -101,7 +101,7 @@ struct RwLock {
     /// lock or the joined clock of the set of last threads to release
     /// shared reader locks.
     data_race: VClock,
-    /// Data race handle for readers, this is temporary storage
+    /// Data race handle for readers. This is temporary storage
     /// for the combined happens-before ordering for between all
     /// concurrent readers and the next writer, and the value
     /// is stored to the main data_race variable once all
diff --git a/src/tools/miri/tests/fail/enum-set-discriminant-niche-variant-wrong.rs b/src/tools/miri/tests/fail/enum-set-discriminant-niche-variant-wrong.rs
index 7097aa0c43ae3..428f371ca51c7 100644
--- a/src/tools/miri/tests/fail/enum-set-discriminant-niche-variant-wrong.rs
+++ b/src/tools/miri/tests/fail/enum-set-discriminant-niche-variant-wrong.rs
@@ -4,11 +4,11 @@
 use std::intrinsics::mir::*;
 use std::num::NonZeroI32;
 
-// We define our own option type so that we can control the varian indices.
+// We define our own option type so that we can control the variant indices.
 #[allow(unused)]
 enum Option<T> {
-    None,
-    Some(T),
+    None,    // variant 0
+    Some(T), // variant 1
 }
 use Option::*;
 
diff --git a/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond.rs b/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond.rs
index b0325f7d78e50..f362caa11dc59 100644
--- a/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond.rs
+++ b/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond.rs
@@ -22,7 +22,7 @@ fn test_timed_wait_timeout(clock_id: i32) {
         let mut now_mu: MaybeUninit<libc::timespec> = MaybeUninit::uninit();
         assert_eq!(libc::clock_gettime(clock_id, now_mu.as_mut_ptr()), 0);
         let now = now_mu.assume_init();
-        // Waiting for a second... mostly because waiting less requires mich more tricky arithmetic.
+        // Waiting for a second... mostly because waiting less requires much more tricky arithmetic.
         // FIXME: wait less.
         let timeout = libc::timespec { tv_sec: now.tv_sec + 1, tv_nsec: now.tv_nsec };
 
diff --git a/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond_isolated.rs b/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond_isolated.rs
index 103ce44006d3a..66c0895a5dab0 100644
--- a/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond_isolated.rs
+++ b/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond_isolated.rs
@@ -21,7 +21,7 @@ fn test_timed_wait_timeout(clock_id: i32) {
         let mut now_mu: MaybeUninit<libc::timespec> = MaybeUninit::uninit();
         assert_eq!(libc::clock_gettime(clock_id, now_mu.as_mut_ptr()), 0);
         let now = now_mu.assume_init();
-        // Waiting for a second... mostly because waiting less requires mich more tricky arithmetic.
+        // Waiting for a second... mostly because waiting less requires much more tricky arithmetic.
         // FIXME: wait less.
         let timeout = libc::timespec { tv_sec: now.tv_sec + 1, tv_nsec: now.tv_nsec };
 
diff --git a/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs b/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs
index 4cc5b7d68a3cc..e812760f791c8 100644
--- a/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs
+++ b/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs
@@ -98,9 +98,8 @@ fn test_mutex_libc_static_initializer_recursive() {
     }
 }
 
-// Testing the behavior of std::sync::RwLock does not fully exercise the pthread rwlock shims, we
-// need to go a layer deeper and test the behavior of the libc functions, because
-// std::sys::unix::rwlock::RWLock itself keeps track of write_locked and num_readers.
+// std::sync::RwLock does not even used pthread_rwlock any more.
+// Do some smoke testing of the API surface.
 fn test_rwlock_libc_static_initializer() {
     let rw = std::cell::UnsafeCell::new(libc::PTHREAD_RWLOCK_INITIALIZER);
     unsafe {

From 781190f5b7bf0e8bd462dd5b7f5eba574a06fa08 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Mon, 12 Feb 2024 14:05:35 +0100
Subject: [PATCH 21/30] also test pthread_mutex/rwlock directly

turns out one of the synchronizations in rwlock_writer_unlock is unnecessary
---
 src/tools/miri/src/concurrency/sync.rs        |  11 +-
 .../miri/tests/pass-dep/shims/pthread-sync.rs | 129 +++++++++++++++++-
 src/tools/miri/tests/pass/concurrency/sync.rs |  27 +++-
 3 files changed, 146 insertions(+), 21 deletions(-)

diff --git a/src/tools/miri/src/concurrency/sync.rs b/src/tools/miri/src/concurrency/sync.rs
index 5d79b9fab0dcf..6863119032544 100644
--- a/src/tools/miri/src/concurrency/sync.rs
+++ b/src/tools/miri/src/concurrency/sync.rs
@@ -110,6 +110,7 @@ struct RwLock {
     /// must load the clock of the last write and must not
     /// add happens-before orderings between shared reader
     /// locks.
+    /// This is only relevant when there is an active reader.
     data_race_reader: VClock,
 }
 
@@ -485,6 +486,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
             Entry::Vacant(_) => return false, // we did not even own this lock
         }
         if let Some(data_race) = &this.machine.data_race {
+            // Add this to the shared-release clock of all concurrent readers.
             data_race.validate_lock_release_shared(
                 &mut rwlock.data_race_reader,
                 reader,
@@ -539,20 +541,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
             }
             rwlock.writer = None;
             trace!("rwlock_writer_unlock: {:?} unlocked by {:?}", id, expected_writer);
-            // Release memory to both reader and writer vector clocks
-            //  since this writer happens-before both the union of readers once they are finished
-            //  and the next writer
+            // Release memory to next lock holder.
             if let Some(data_race) = &this.machine.data_race {
                 data_race.validate_lock_release(
                     &mut rwlock.data_race,
                     current_writer,
                     current_span,
                 );
-                data_race.validate_lock_release(
-                    &mut rwlock.data_race_reader,
-                    current_writer,
-                    current_span,
-                );
             }
             // The thread was a writer.
             //
diff --git a/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs b/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs
index e812760f791c8..077bbfff1645f 100644
--- a/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs
+++ b/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs
@@ -1,24 +1,34 @@
 //@ignore-target-windows: No libc on Windows
+// We use `yield` to test specific interleavings, so disable automatic preemption.
+//@compile-flags: -Zmiri-preemption-rate=0
+#![feature(sync_unsafe_cell)]
+
+use std::cell::SyncUnsafeCell;
+use std::thread;
+use std::{mem, ptr};
 
 fn main() {
     test_mutex_libc_init_recursive();
     test_mutex_libc_init_normal();
     test_mutex_libc_init_errorcheck();
     test_rwlock_libc_static_initializer();
-
     #[cfg(target_os = "linux")]
     test_mutex_libc_static_initializer_recursive();
+
+    test_mutex();
+    check_rwlock_write();
+    check_rwlock_read_no_deadlock();
 }
 
 fn test_mutex_libc_init_recursive() {
     unsafe {
-        let mut attr: libc::pthread_mutexattr_t = std::mem::zeroed();
+        let mut attr: libc::pthread_mutexattr_t = mem::zeroed();
         assert_eq!(libc::pthread_mutexattr_init(&mut attr as *mut _), 0);
         assert_eq!(
             libc::pthread_mutexattr_settype(&mut attr as *mut _, libc::PTHREAD_MUTEX_RECURSIVE),
             0,
         );
-        let mut mutex: libc::pthread_mutex_t = std::mem::zeroed();
+        let mut mutex: libc::pthread_mutex_t = mem::zeroed();
         assert_eq!(libc::pthread_mutex_init(&mut mutex as *mut _, &mut attr as *mut _), 0);
         assert_eq!(libc::pthread_mutex_lock(&mut mutex as *mut _), 0);
         assert_eq!(libc::pthread_mutex_trylock(&mut mutex as *mut _), 0);
@@ -36,7 +46,7 @@ fn test_mutex_libc_init_recursive() {
 
 fn test_mutex_libc_init_normal() {
     unsafe {
-        let mut mutexattr: libc::pthread_mutexattr_t = std::mem::zeroed();
+        let mut mutexattr: libc::pthread_mutexattr_t = mem::zeroed();
         assert_eq!(
             libc::pthread_mutexattr_settype(&mut mutexattr as *mut _, 0x12345678),
             libc::EINVAL,
@@ -45,7 +55,7 @@ fn test_mutex_libc_init_normal() {
             libc::pthread_mutexattr_settype(&mut mutexattr as *mut _, libc::PTHREAD_MUTEX_NORMAL),
             0,
         );
-        let mut mutex: libc::pthread_mutex_t = std::mem::zeroed();
+        let mut mutex: libc::pthread_mutex_t = mem::zeroed();
         assert_eq!(libc::pthread_mutex_init(&mut mutex as *mut _, &mutexattr as *const _), 0);
         assert_eq!(libc::pthread_mutex_lock(&mut mutex as *mut _), 0);
         assert_eq!(libc::pthread_mutex_trylock(&mut mutex as *mut _), libc::EBUSY);
@@ -58,7 +68,7 @@ fn test_mutex_libc_init_normal() {
 
 fn test_mutex_libc_init_errorcheck() {
     unsafe {
-        let mut mutexattr: libc::pthread_mutexattr_t = std::mem::zeroed();
+        let mut mutexattr: libc::pthread_mutexattr_t = mem::zeroed();
         assert_eq!(
             libc::pthread_mutexattr_settype(
                 &mut mutexattr as *mut _,
@@ -66,7 +76,7 @@ fn test_mutex_libc_init_errorcheck() {
             ),
             0,
         );
-        let mut mutex: libc::pthread_mutex_t = std::mem::zeroed();
+        let mut mutex: libc::pthread_mutex_t = mem::zeroed();
         assert_eq!(libc::pthread_mutex_init(&mut mutex as *mut _, &mutexattr as *const _), 0);
         assert_eq!(libc::pthread_mutex_lock(&mut mutex as *mut _), 0);
         assert_eq!(libc::pthread_mutex_trylock(&mut mutex as *mut _), libc::EBUSY);
@@ -98,6 +108,111 @@ fn test_mutex_libc_static_initializer_recursive() {
     }
 }
 
+struct SendPtr<T> {
+    ptr: *mut T,
+}
+unsafe impl<T> Send for SendPtr<T> {}
+impl<T> Copy for SendPtr<T> {}
+impl<T> Clone for SendPtr<T> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+fn test_mutex() {
+    // Specifically *not* using `Arc` to make sure there is no synchronization apart from the mutex.
+    unsafe {
+        let data = SyncUnsafeCell::new((libc::PTHREAD_MUTEX_INITIALIZER, 0));
+        let ptr = SendPtr { ptr: data.get() };
+        let mut threads = Vec::new();
+
+        for _ in 0..3 {
+            let thread = thread::spawn(move || {
+                let ptr = ptr; // circumvent per-field closure capture
+                let mutexptr = ptr::addr_of_mut!((*ptr.ptr).0);
+                assert_eq!(libc::pthread_mutex_lock(mutexptr), 0);
+                thread::yield_now();
+                (*ptr.ptr).1 += 1;
+                assert_eq!(libc::pthread_mutex_unlock(mutexptr), 0);
+            });
+            threads.push(thread);
+        }
+
+        for thread in threads {
+            thread.join().unwrap();
+        }
+
+        let mutexptr = ptr::addr_of_mut!((*ptr.ptr).0);
+        assert_eq!(libc::pthread_mutex_trylock(mutexptr), 0);
+        assert_eq!((*ptr.ptr).1, 3);
+    }
+}
+
+fn check_rwlock_write() {
+    unsafe {
+        let data = SyncUnsafeCell::new((libc::PTHREAD_RWLOCK_INITIALIZER, 0));
+        let ptr = SendPtr { ptr: data.get() };
+        let mut threads = Vec::new();
+
+        for _ in 0..3 {
+            let thread = thread::spawn(move || {
+                let ptr = ptr; // circumvent per-field closure capture
+                let rwlockptr = ptr::addr_of_mut!((*ptr.ptr).0);
+                assert_eq!(libc::pthread_rwlock_wrlock(rwlockptr), 0);
+                thread::yield_now();
+                (*ptr.ptr).1 += 1;
+                assert_eq!(libc::pthread_rwlock_unlock(rwlockptr), 0);
+            });
+            threads.push(thread);
+
+            let readthread = thread::spawn(move || {
+                let ptr = ptr; // circumvent per-field closure capture
+                let rwlockptr = ptr::addr_of_mut!((*ptr.ptr).0);
+                assert_eq!(libc::pthread_rwlock_rdlock(rwlockptr), 0);
+                thread::yield_now();
+                let val = (*ptr.ptr).1;
+                assert!(val >= 0 && val <= 3);
+                assert_eq!(libc::pthread_rwlock_unlock(rwlockptr), 0);
+            });
+            threads.push(readthread);
+        }
+
+        for thread in threads {
+            thread.join().unwrap();
+        }
+
+        let rwlockptr = ptr::addr_of_mut!((*ptr.ptr).0);
+        assert_eq!(libc::pthread_rwlock_tryrdlock(rwlockptr), 0);
+        assert_eq!((*ptr.ptr).1, 3);
+    }
+}
+
+fn check_rwlock_read_no_deadlock() {
+    unsafe {
+        let l1 = SyncUnsafeCell::new(libc::PTHREAD_RWLOCK_INITIALIZER);
+        let l1 = SendPtr { ptr: l1.get() };
+        let l2 = SyncUnsafeCell::new(libc::PTHREAD_RWLOCK_INITIALIZER);
+        let l2 = SendPtr { ptr: l2.get() };
+
+        // acquire l1 and hold it until after the other thread is done
+        assert_eq!(libc::pthread_rwlock_rdlock(l1.ptr), 0);
+        let handle = thread::spawn(move || {
+            let l1 = l1; // circumvent per-field closure capture
+            let l2 = l2; // circumvent per-field closure capture
+            // acquire l2 before the other thread
+            assert_eq!(libc::pthread_rwlock_rdlock(l2.ptr), 0);
+            thread::yield_now();
+            assert_eq!(libc::pthread_rwlock_rdlock(l1.ptr), 0);
+            thread::yield_now();
+            assert_eq!(libc::pthread_rwlock_unlock(l1.ptr), 0);
+            assert_eq!(libc::pthread_rwlock_unlock(l2.ptr), 0);
+        });
+        thread::yield_now();
+        assert_eq!(libc::pthread_rwlock_rdlock(l2.ptr), 0);
+        handle.join().unwrap();
+    }
+}
+
 // std::sync::RwLock does not even used pthread_rwlock any more.
 // Do some smoke testing of the API surface.
 fn test_rwlock_libc_static_initializer() {
diff --git a/src/tools/miri/tests/pass/concurrency/sync.rs b/src/tools/miri/tests/pass/concurrency/sync.rs
index e93e617fd2620..1d48e5312d496 100644
--- a/src/tools/miri/tests/pass/concurrency/sync.rs
+++ b/src/tools/miri/tests/pass/concurrency/sync.rs
@@ -1,6 +1,7 @@
 //@revisions: stack tree
 //@[tree]compile-flags: -Zmiri-tree-borrows
-//@compile-flags: -Zmiri-disable-isolation -Zmiri-strict-provenance
+// We use `yield` to test specific interleavings, so disable automatic preemption.
+//@compile-flags: -Zmiri-disable-isolation -Zmiri-strict-provenance -Zmiri-preemption-rate=0
 
 use std::sync::{Arc, Barrier, Condvar, Mutex, Once, RwLock};
 use std::thread;
@@ -119,13 +120,25 @@ fn check_rwlock_write() {
     let mut threads = Vec::new();
 
     for _ in 0..3 {
-        let data = Arc::clone(&data);
-        let thread = thread::spawn(move || {
-            let mut data = data.write().unwrap();
-            thread::yield_now();
-            *data += 1;
+        let thread = thread::spawn({
+            let data = Arc::clone(&data);
+            move || {
+                let mut data = data.write().unwrap();
+                thread::yield_now();
+                *data += 1;
+            }
         });
         threads.push(thread);
+
+        let readthread = thread::spawn({
+            let data = Arc::clone(&data);
+            move || {
+                let data = data.read().unwrap();
+                thread::yield_now();
+                assert!(*data >= 0 && *data <= 3);
+            }
+        });
+        threads.push(readthread);
     }
 
     for thread in threads {
@@ -144,8 +157,10 @@ fn check_rwlock_read_no_deadlock() {
 
     let l1_copy = Arc::clone(&l1);
     let l2_copy = Arc::clone(&l2);
+    // acquire l1 and hold it until after the other thread is done
     let _guard1 = l1.read().unwrap();
     let handle = thread::spawn(move || {
+        // acquire l2 before the other thread
         let _guard2 = l2_copy.read().unwrap();
         thread::yield_now();
         let _guard1 = l1_copy.read().unwrap();

From 760e8d284241f95fd74e96f065055293e9698d79 Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Tue, 13 Feb 2024 05:14:22 +0000
Subject: [PATCH 22/30] Preparing for merge from rustc

---
 src/tools/miri/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 2c98082bc1e0c..eca1a2335c803 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-b17491c8f6d555386104dfd82004c01bfef09c95
+d26b41711282042c4ea0c5733e7332b07cfa4933

From 43e9411db839747f650e1125ed06266566c20b65 Mon Sep 17 00:00:00 2001
From: Oli Scherer <git-spam-no-reply9815368754983@oli-obk.de>
Date: Tue, 13 Feb 2024 08:31:45 +0000
Subject: [PATCH 23/30] Prevent rustfmt from messing up experimental syntax

---
 src/tools/miri/tests/pass/async-closure.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tools/miri/tests/pass/async-closure.rs b/src/tools/miri/tests/pass/async-closure.rs
index 9b2fc2948bf45..e04acfc39cfbc 100644
--- a/src/tools/miri/tests/pass/async-closure.rs
+++ b/src/tools/miri/tests/pass/async-closure.rs
@@ -16,6 +16,7 @@ pub fn block_on<T>(fut: impl Future<Output = T>) -> T {
     }
 }
 
+#[rustfmt::skip]
 async fn call_once(f: impl async FnOnce(DropMe)) {
     f(DropMe("world")).await;
 }

From 6539a64d73849176ff327b28a55d7d6787f293ef Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Wed, 14 Feb 2024 19:08:31 +0100
Subject: [PATCH 24/30] Preparing for merge from rustc

---
 src/tools/miri/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index eca1a2335c803..2748518e634df 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-d26b41711282042c4ea0c5733e7332b07cfa4933
+502ce8287bc3c86dca07acc38c5ff9431a6097be

From 17693eb54944377873deba31755e65ced8f24ea0 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Wed, 14 Feb 2024 19:09:17 +0100
Subject: [PATCH 25/30] remove no-longer needed rustfmt::skip

---
 src/tools/miri/tests/pass/async-closure.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tools/miri/tests/pass/async-closure.rs b/src/tools/miri/tests/pass/async-closure.rs
index e04acfc39cfbc..9b2fc2948bf45 100644
--- a/src/tools/miri/tests/pass/async-closure.rs
+++ b/src/tools/miri/tests/pass/async-closure.rs
@@ -16,7 +16,6 @@ pub fn block_on<T>(fut: impl Future<Output = T>) -> T {
     }
 }
 
-#[rustfmt::skip]
 async fn call_once(f: impl async FnOnce(DropMe)) {
     f(DropMe("world")).await;
 }

From 089eb6b11faee46ca2bd3cfdefacc0bd4b4b4c1d Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Fri, 16 Feb 2024 04:54:51 +0000
Subject: [PATCH 26/30] Preparing for merge from rustc

---
 src/tools/miri/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 2748518e634df..2115e482c64a1 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-502ce8287bc3c86dca07acc38c5ff9431a6097be
+0f806a9812b62c36bdab08d33c14cf2d3ecf4355

From 840ca093e6e44588b7af0564756cd2384c9a6da9 Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Fri, 16 Feb 2024 05:03:16 +0000
Subject: [PATCH 27/30] fmt

---
 src/tools/miri/src/shims/x86/mod.rs   | 15 +++------------
 src/tools/miri/src/shims/x86/sse.rs   |  5 +----
 src/tools/miri/src/shims/x86/sse2.rs  | 10 ++--------
 src/tools/miri/src/shims/x86/sse41.rs |  5 +----
 4 files changed, 7 insertions(+), 28 deletions(-)

diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs
index b24ea8aec84bd..115be1d6f2236 100644
--- a/src/tools/miri/src/shims/x86/mod.rs
+++ b/src/tools/miri/src/shims/x86/mod.rs
@@ -296,10 +296,7 @@ fn bin_op_simd_float_first<'tcx, F: rustc_apfloat::Float>(
     this.write_scalar(res0, &this.project_index(&dest, 0)?)?;
 
     for i in 1..dest_len {
-        this.copy_op(
-            &this.project_index(&left, i)?,
-            &this.project_index(&dest, i)?,
-        )?;
+        this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?;
     }
 
     Ok(())
@@ -420,10 +417,7 @@ fn unary_op_ss<'tcx>(
     this.write_scalar(res0, &this.project_index(&dest, 0)?)?;
 
     for i in 1..dest_len {
-        this.copy_op(
-            &this.project_index(&op, i)?,
-            &this.project_index(&dest, i)?,
-        )?;
+        this.copy_op(&this.project_index(&op, i)?, &this.project_index(&dest, i)?)?;
     }
 
     Ok(())
@@ -479,10 +473,7 @@ fn round_first<'tcx, F: rustc_apfloat::Float>(
     )?;
 
     for i in 1..dest_len {
-        this.copy_op(
-            &this.project_index(&left, i)?,
-            &this.project_index(&dest, i)?,
-        )?;
+        this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?;
     }
 
     Ok(())
diff --git a/src/tools/miri/src/shims/x86/sse.rs b/src/tools/miri/src/shims/x86/sse.rs
index 9fb947cb2a3dc..da0db92738faf 100644
--- a/src/tools/miri/src/shims/x86/sse.rs
+++ b/src/tools/miri/src/shims/x86/sse.rs
@@ -208,10 +208,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 this.write_immediate(*res0, &dest0)?;
 
                 for i in 1..dest_len {
-                    this.copy_op(
-                        &this.project_index(&left, i)?,
-                        &this.project_index(&dest, i)?,
-                    )?;
+                    this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?;
                 }
             }
             _ => return Ok(EmulateForeignItemResult::NotSupported),
diff --git a/src/tools/miri/src/shims/x86/sse2.rs b/src/tools/miri/src/shims/x86/sse2.rs
index e5c8267320a45..b34b93e373900 100644
--- a/src/tools/miri/src/shims/x86/sse2.rs
+++ b/src/tools/miri/src/shims/x86/sse2.rs
@@ -440,10 +440,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 this.write_scalar(res0, &this.project_index(&dest, 0)?)?;
 
                 for i in 1..dest_len {
-                    this.copy_op(
-                        &this.project_index(&op, i)?,
-                        &this.project_index(&dest, i)?,
-                    )?;
+                    this.copy_op(&this.project_index(&op, i)?, &this.project_index(&dest, i)?)?;
                 }
             }
             // Used to implement _mm_sqrt_pd functions.
@@ -580,10 +577,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
 
                 // Copy remianing from `left`
                 for i in 1..dest_len {
-                    this.copy_op(
-                        &this.project_index(&left, i)?,
-                        &this.project_index(&dest, i)?,
-                    )?;
+                    this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?;
                 }
             }
             // Used to implement the `_mm_pause` function.
diff --git a/src/tools/miri/src/shims/x86/sse41.rs b/src/tools/miri/src/shims/x86/sse41.rs
index 2abd10fa7a77d..32b1fe43c5837 100644
--- a/src/tools/miri/src/shims/x86/sse41.rs
+++ b/src/tools/miri/src/shims/x86/sse41.rs
@@ -57,10 +57,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                         this.write_immediate(*src_value, &dest)?;
                     } else {
                         // copy from `left`
-                        this.copy_op(
-                            &this.project_index(&left, i)?,
-                            &dest,
-                        )?;
+                        this.copy_op(&this.project_index(&left, i)?, &dest)?;
                     }
                 }
             }

From 524c16d38744a45d25b17542228e4f2f628f66ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= <eduardosm-dev@e64.io>
Date: Sun, 26 Nov 2023 11:27:41 +0100
Subject: [PATCH 28/30] Implement x86 AVX intrinsics

---
 src/tools/miri/src/shims/x86/avx.rs           |  417 ++++++
 src/tools/miri/src/shims/x86/mod.rs           |  208 ++-
 .../miri/tests/pass/intrinsics-x86-avx.rs     | 1269 +++++++++++++++++
 3 files changed, 1844 insertions(+), 50 deletions(-)
 create mode 100644 src/tools/miri/src/shims/x86/avx.rs

diff --git a/src/tools/miri/src/shims/x86/avx.rs b/src/tools/miri/src/shims/x86/avx.rs
new file mode 100644
index 0000000000000..65de1607595be
--- /dev/null
+++ b/src/tools/miri/src/shims/x86/avx.rs
@@ -0,0 +1,417 @@
+use rustc_apfloat::{ieee::Double, ieee::Single};
+use rustc_middle::mir;
+use rustc_middle::ty::layout::LayoutOf as _;
+use rustc_middle::ty::Ty;
+use rustc_span::Symbol;
+use rustc_target::spec::abi::Abi;
+
+use super::{
+    bin_op_simd_float_all, conditional_dot_product, convert_float_to_int, horizontal_bin_op,
+    round_all, test_bits_masked, test_high_bits_masked, unary_op_ps, FloatBinOp, FloatUnaryOp,
+};
+use crate::*;
+use shims::foreign_items::EmulateForeignItemResult;
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
+    fn emulate_x86_avx_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: Abi,
+        args: &[OpTy<'tcx, Provenance>],
+        dest: &PlaceTy<'tcx, Provenance>,
+    ) -> InterpResult<'tcx, EmulateForeignItemResult> {
+        let this = self.eval_context_mut();
+        this.expect_target_feature_for_intrinsic(link_name, "avx")?;
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.avx.").unwrap();
+
+        match unprefixed_name {
+            // Used to implement _mm256_min_ps and _mm256_max_ps functions.
+            // Note that the semantics are a bit different from Rust simd_min
+            // and simd_max intrinsics regarding handling of NaN and -0.0: Rust
+            // matches the IEEE min/max operations, while x86 has different
+            // semantics.
+            "min.ps.256" | "max.ps.256" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let which = match unprefixed_name {
+                    "min.ps.256" => FloatBinOp::Min,
+                    "max.ps.256" => FloatBinOp::Max,
+                    _ => unreachable!(),
+                };
+
+                bin_op_simd_float_all::<Single>(this, which, left, right, dest)?;
+            }
+            // Used to implement _mm256_min_pd and _mm256_max_pd functions.
+            "min.pd.256" | "max.pd.256" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let which = match unprefixed_name {
+                    "min.pd.256" => FloatBinOp::Min,
+                    "max.pd.256" => FloatBinOp::Max,
+                    _ => unreachable!(),
+                };
+
+                bin_op_simd_float_all::<Double>(this, which, left, right, dest)?;
+            }
+            // Used to implement the _mm256_round_ps function.
+            // Rounds the elements of `op` according to `rounding`.
+            "round.ps.256" => {
+                let [op, rounding] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                round_all::<rustc_apfloat::ieee::Single>(this, op, rounding, dest)?;
+            }
+            // Used to implement the _mm256_round_pd function.
+            // Rounds the elements of `op` according to `rounding`.
+            "round.pd.256" => {
+                let [op, rounding] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                round_all::<rustc_apfloat::ieee::Double>(this, op, rounding, dest)?;
+            }
+            // Used to implement _mm256_{sqrt,rcp,rsqrt}_ps functions.
+            // Performs the operations on all components of `op`.
+            "sqrt.ps.256" | "rcp.ps.256" | "rsqrt.ps.256" => {
+                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let which = match unprefixed_name {
+                    "sqrt.ps.256" => FloatUnaryOp::Sqrt,
+                    "rcp.ps.256" => FloatUnaryOp::Rcp,
+                    "rsqrt.ps.256" => FloatUnaryOp::Rsqrt,
+                    _ => unreachable!(),
+                };
+
+                unary_op_ps(this, which, op, dest)?;
+            }
+            // Used to implement the _mm256_dp_ps function.
+            "dp.ps.256" => {
+                let [left, right, imm] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                conditional_dot_product(this, left, right, imm, dest)?;
+            }
+            // Used to implement the _mm256_h{add,sub}_p{s,d} functions.
+            // Horizontally add/subtract adjacent floating point values
+            // in `left` and `right`.
+            "hadd.ps.256" | "hadd.pd.256" | "hsub.ps.256" | "hsub.pd.256" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let which = match unprefixed_name {
+                    "hadd.ps.256" | "hadd.pd.256" => mir::BinOp::Add,
+                    "hsub.ps.256" | "hsub.pd.256" => mir::BinOp::Sub,
+                    _ => unreachable!(),
+                };
+
+                horizontal_bin_op(this, which, /*saturating*/ false, left, right, dest)?;
+            }
+            // Used to implement the _mm256_cmp_ps function.
+            // Performs a comparison operation on each component of `left`
+            // and `right`. For each component, returns 0 if false or u32::MAX
+            // if true.
+            "cmp.ps.256" => {
+                let [left, right, imm] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let which =
+                    FloatBinOp::cmp_from_imm(this, this.read_scalar(imm)?.to_i8()?, link_name)?;
+
+                bin_op_simd_float_all::<Single>(this, which, left, right, dest)?;
+            }
+            // Used to implement the _mm256_cmp_pd function.
+            // Performs a comparison operation on each component of `left`
+            // and `right`. For each component, returns 0 if false or u64::MAX
+            // if true.
+            "cmp.pd.256" => {
+                let [left, right, imm] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let which =
+                    FloatBinOp::cmp_from_imm(this, this.read_scalar(imm)?.to_i8()?, link_name)?;
+
+                bin_op_simd_float_all::<Double>(this, which, left, right, dest)?;
+            }
+            // Used to implement the _mm256_cvtps_epi32, _mm256_cvttps_epi32, _mm256_cvtpd_epi32
+            // and _mm256_cvttpd_epi32 functions.
+            // Converts packed f32/f64 to packed i32.
+            "cvt.ps2dq.256" | "cvtt.ps2dq.256" | "cvt.pd2dq.256" | "cvtt.pd2dq.256" => {
+                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let rnd = match unprefixed_name {
+                    // "current SSE rounding mode", assume nearest
+                    "cvt.ps2dq.256" | "cvt.pd2dq.256" => rustc_apfloat::Round::NearestTiesToEven,
+                    // always truncate
+                    "cvtt.ps2dq.256" | "cvtt.pd2dq.256" => rustc_apfloat::Round::TowardZero,
+                    _ => unreachable!(),
+                };
+
+                convert_float_to_int(this, op, rnd, dest)?;
+            }
+            // Used to implement the _mm_permutevar_ps and _mm256_permutevar_ps functions.
+            // Shuffles 32-bit floats from `data` using `control` as control. Each 128-bit
+            // chunk is shuffled independently: this means that we view the vector as a
+            // sequence of 4-element arrays, and we shuffle each of these arrays, where
+            // `control` determines which element of the current `data` array is written.
+            "vpermilvar.ps" | "vpermilvar.ps.256" => {
+                let [data, control] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (data, data_len) = this.operand_to_simd(data)?;
+                let (control, control_len) = this.operand_to_simd(control)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, data_len);
+                assert_eq!(dest_len, control_len);
+
+                for i in 0..dest_len {
+                    let control = this.project_index(&control, i)?;
+
+                    // Each 128-bit chunk is shuffled independently. Since each chunk contains
+                    // four 32-bit elements, only two bits from `control` are used. To read the
+                    // value from the current chunk, add the destination index truncated to a multiple
+                    // of 4.
+                    let chunk_base = i & !0b11;
+                    let src_i = u64::from(this.read_scalar(&control)?.to_u32()? & 0b11)
+                        .checked_add(chunk_base)
+                        .unwrap();
+
+                    this.copy_op(
+                        &this.project_index(&data, src_i)?,
+                        &this.project_index(&dest, i)?,
+                    )?;
+                }
+            }
+            // Used to implement the _mm_permutevar_pd and _mm256_permutevar_pd functions.
+            // Shuffles 64-bit floats from `left` using `right` as control. Each 128-bit
+            // chunk is shuffled independently: this means that we view the vector as
+            // a sequence of 2-element arrays, and we shuffle each of these arrays,
+            // where `right` determines which element of the current `left` array is
+            // written.
+            "vpermilvar.pd" | "vpermilvar.pd.256" => {
+                let [data, control] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (data, data_len) = this.operand_to_simd(data)?;
+                let (control, control_len) = this.operand_to_simd(control)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, data_len);
+                assert_eq!(dest_len, control_len);
+
+                for i in 0..dest_len {
+                    let control = this.project_index(&control, i)?;
+
+                    // Each 128-bit chunk is shuffled independently. Since each chunk contains
+                    // two 64-bit elements, only the second bit from `control` is used (yes, the
+                    // second instead of the first, ask Intel). To read the value from the current
+                    // chunk, add the destination index truncated to a multiple of 2.
+                    let chunk_base = i & !1;
+                    let src_i = ((this.read_scalar(&control)?.to_u64()? >> 1) & 1)
+                        .checked_add(chunk_base)
+                        .unwrap();
+
+                    this.copy_op(
+                        &this.project_index(&data, src_i)?,
+                        &this.project_index(&dest, i)?,
+                    )?;
+                }
+            }
+            // Used to implement the _mm256_permute2f128_ps, _mm256_permute2f128_pd and
+            // _mm256_permute2f128_si256 functions. Regardless of the suffix in the name
+            // thay all can be considered to operate on vectors of 128-bit elements.
+            // For each 128-bit element of `dest`, copies one from `left`, `right` or
+            // zero, according to `imm`.
+            "vperm2f128.ps.256" | "vperm2f128.pd.256" | "vperm2f128.si.256" => {
+                let [left, right, imm] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                assert_eq!(dest.layout, left.layout);
+                assert_eq!(dest.layout, right.layout);
+                assert_eq!(dest.layout.size.bits(), 256);
+
+                // Transmute to `[u128; 2]` to process each 128-bit chunk independently.
+                let u128x2_layout =
+                    this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, 2))?;
+                let left = left.transmute(u128x2_layout, this)?;
+                let right = right.transmute(u128x2_layout, this)?;
+                let dest = dest.transmute(u128x2_layout, this)?;
+
+                let imm = this.read_scalar(imm)?.to_u8()?;
+
+                for i in 0..2 {
+                    let dest = this.project_index(&dest, i)?;
+
+                    let imm = match i {
+                        0 => imm & 0xF,
+                        1 => imm >> 4,
+                        _ => unreachable!(),
+                    };
+                    if imm & 0b100 != 0 {
+                        this.write_scalar(Scalar::from_u128(0), &dest)?;
+                    } else {
+                        let src = match imm {
+                            0b00 => this.project_index(&left, 0)?,
+                            0b01 => this.project_index(&left, 1)?,
+                            0b10 => this.project_index(&right, 0)?,
+                            0b11 => this.project_index(&right, 1)?,
+                            _ => unreachable!(),
+                        };
+                        this.copy_op(&src, &dest)?;
+                    }
+                }
+            }
+            // Used to implement the _mm_maskload_ps, _mm_maskload_pd, _mm256_maskload_ps
+            // and _mm256_maskload_pd functions.
+            // For the element `i`, if the high bit of the `i`-th element of `mask`
+            // is one, it is loaded from `ptr.wrapping_add(i)`, otherwise zero is
+            // loaded.
+            "maskload.ps" | "maskload.pd" | "maskload.ps.256" | "maskload.pd.256" => {
+                let [ptr, mask] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                mask_load(this, ptr, mask, dest)?;
+            }
+            // Used to implement the _mm_maskstore_ps, _mm_maskstore_pd, _mm256_maskstore_ps
+            // and _mm256_maskstore_pd functions.
+            // For the element `i`, if the high bit of the element `i`-th of `mask`
+            // is one, it is stored into `ptr.wapping_add(i)`.
+            // Unlike SSE2's _mm_maskmoveu_si128, these are not non-temporal stores.
+            "maskstore.ps" | "maskstore.pd" | "maskstore.ps.256" | "maskstore.pd.256" => {
+                let [ptr, mask, value] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                mask_store(this, ptr, mask, value)?;
+            }
+            // Used to implement the _mm256_lddqu_si256 function.
+            // Reads a 256-bit vector from an unaligned pointer. This intrinsic
+            // is expected to perform better than a regular unaligned read when
+            // the data crosses a cache line, but for Miri this is just a regular
+            // unaligned read.
+            "ldu.dq.256" => {
+                let [src_ptr] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+                let src_ptr = this.read_pointer(src_ptr)?;
+                let dest = dest.force_mplace(this)?;
+
+                // Unaligned copy, which is what we want.
+                this.mem_copy(src_ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
+            }
+            // Used to implement the _mm256_testz_si256, _mm256_testc_si256 and
+            // _mm256_testnzc_si256 functions.
+            // Tests `op & mask == 0`, `op & mask == mask` or
+            // `op & mask != 0 && op & mask != mask`
+            "ptestz.256" | "ptestc.256" | "ptestnzc.256" => {
+                let [op, mask] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (all_zero, masked_set) = test_bits_masked(this, op, mask)?;
+                let res = match unprefixed_name {
+                    "ptestz.256" => all_zero,
+                    "ptestc.256" => masked_set,
+                    "ptestnzc.256" => !all_zero && !masked_set,
+                    _ => unreachable!(),
+                };
+
+                this.write_scalar(Scalar::from_i32(res.into()), dest)?;
+            }
+            // Used to implement the _mm256_testz_pd, _mm256_testc_pd, _mm256_testnzc_pd
+            // _mm_testz_pd, _mm_testc_pd, _mm_testnzc_pd, _mm256_testz_ps,
+            // _mm256_testc_ps, _mm256_testnzc_ps, _mm_testz_ps, _mm_testc_ps and
+            // _mm_testnzc_ps functions.
+            // Calculates two booleans:
+            // `direct`, which is true when the highest bit of each element of `op & mask` is zero.
+            // `negated`, which is true when the highest bit of each element of `!op & mask` is zero.
+            // Return `direct` (testz), `negated` (testc) or `!direct & !negated` (testnzc)
+            "vtestz.pd.256" | "vtestc.pd.256" | "vtestnzc.pd.256" | "vtestz.pd" | "vtestc.pd"
+            | "vtestnzc.pd" | "vtestz.ps.256" | "vtestc.ps.256" | "vtestnzc.ps.256"
+            | "vtestz.ps" | "vtestc.ps" | "vtestnzc.ps" => {
+                let [op, mask] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (direct, negated) = test_high_bits_masked(this, op, mask)?;
+                let res = match unprefixed_name {
+                    "vtestz.pd.256" | "vtestz.pd" | "vtestz.ps.256" | "vtestz.ps" => direct,
+                    "vtestc.pd.256" | "vtestc.pd" | "vtestc.ps.256" | "vtestc.ps" => negated,
+                    "vtestnzc.pd.256" | "vtestnzc.pd" | "vtestnzc.ps.256" | "vtestnzc.ps" =>
+                        !direct && !negated,
+                    _ => unreachable!(),
+                };
+
+                this.write_scalar(Scalar::from_i32(res.into()), dest)?;
+            }
+            _ => return Ok(EmulateForeignItemResult::NotSupported),
+        }
+        Ok(EmulateForeignItemResult::NeedsJumping)
+    }
+}
+
+/// Conditionally loads from `ptr` according the high bit of each
+/// element of `mask`. `ptr` does not need to be aligned.
+fn mask_load<'tcx>(
+    this: &mut crate::MiriInterpCx<'_, 'tcx>,
+    ptr: &OpTy<'tcx, Provenance>,
+    mask: &OpTy<'tcx, Provenance>,
+    dest: &PlaceTy<'tcx, Provenance>,
+) -> InterpResult<'tcx, ()> {
+    let (mask, mask_len) = this.operand_to_simd(mask)?;
+    let (dest, dest_len) = this.place_to_simd(dest)?;
+
+    assert_eq!(dest_len, mask_len);
+
+    let mask_item_size = mask.layout.field(this, 0).size;
+    let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();
+
+    let ptr = this.read_pointer(ptr)?;
+    for i in 0..dest_len {
+        let mask = this.project_index(&mask, i)?;
+        let dest = this.project_index(&dest, i)?;
+
+        if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
+            // Size * u64 is implemented as always checked
+            #[allow(clippy::arithmetic_side_effects)]
+            let ptr = ptr.wrapping_offset(dest.layout.size * i, &this.tcx);
+            // Unaligned copy, which is what we want.
+            this.mem_copy(ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
+        } else {
+            this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
+        }
+    }
+
+    Ok(())
+}
+
+/// Conditionally stores into `ptr` according the high bit of each
+/// element of `mask`. `ptr` does not need to be aligned.
+fn mask_store<'tcx>(
+    this: &mut crate::MiriInterpCx<'_, 'tcx>,
+    ptr: &OpTy<'tcx, Provenance>,
+    mask: &OpTy<'tcx, Provenance>,
+    value: &OpTy<'tcx, Provenance>,
+) -> InterpResult<'tcx, ()> {
+    let (mask, mask_len) = this.operand_to_simd(mask)?;
+    let (value, value_len) = this.operand_to_simd(value)?;
+
+    assert_eq!(value_len, mask_len);
+
+    let mask_item_size = mask.layout.field(this, 0).size;
+    let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();
+
+    let ptr = this.read_pointer(ptr)?;
+    for i in 0..value_len {
+        let mask = this.project_index(&mask, i)?;
+        let value = this.project_index(&value, i)?;
+
+        if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
+            // Size * u64 is implemented as always checked
+            #[allow(clippy::arithmetic_side_effects)]
+            let ptr = ptr.wrapping_offset(value.layout.size * i, &this.tcx);
+            // Unaligned copy, which is what we want.
+            this.mem_copy(value.ptr(), ptr, value.layout.size, /*nonoverlapping*/ true)?;
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs
index 115be1d6f2236..9cfee20014fc1 100644
--- a/src/tools/miri/src/shims/x86/mod.rs
+++ b/src/tools/miri/src/shims/x86/mod.rs
@@ -1,6 +1,8 @@
 use rand::Rng as _;
 
-use rustc_apfloat::{ieee::Single, Float as _};
+use rustc_apfloat::{ieee::Single, Float};
+use rustc_middle::ty::layout::LayoutOf as _;
+use rustc_middle::ty::Ty;
 use rustc_middle::{mir, ty};
 use rustc_span::Symbol;
 use rustc_target::abi::Size;
@@ -11,6 +13,7 @@ use helpers::bool_to_simd_element;
 use shims::foreign_items::EmulateForeignItemResult;
 
 mod aesni;
+mod avx;
 mod sse;
 mod sse2;
 mod sse3;
@@ -115,6 +118,11 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     this, link_name, abi, args, dest,
                 );
             }
+            name if name.starts_with("avx.") => {
+                return avx::EvalContextExt::emulate_x86_avx_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
 
             _ => return Ok(EmulateForeignItemResult::NotSupported),
         }
@@ -563,8 +571,65 @@ fn convert_float_to_int<'tcx>(
     Ok(())
 }
 
+/// Splits `left`, `right` and `dest` (which must be SIMD vectors)
+/// into 128-bit chuncks.
+///
+/// `left`, `right` and `dest` cannot have different types.
+///
+/// Returns a tuple where:
+/// * The first element is the number of 128-bit chunks (let's call it `N`).
+/// * The second element is the number of elements per chunk (let's call it `M`).
+/// * The third element is the `left` vector split into chunks, i.e, it's
+///   type is `[[T; M]; N]`.
+/// * The fourth element is the `right` vector split into chunks.
+/// * The fifth element is the `dest` vector split into chunks.
+fn split_simd_to_128bit_chunks<'tcx>(
+    this: &mut crate::MiriInterpCx<'_, 'tcx>,
+    left: &OpTy<'tcx, Provenance>,
+    right: &OpTy<'tcx, Provenance>,
+    dest: &PlaceTy<'tcx, Provenance>,
+) -> InterpResult<
+    'tcx,
+    (u64, u64, MPlaceTy<'tcx, Provenance>, MPlaceTy<'tcx, Provenance>, MPlaceTy<'tcx, Provenance>),
+> {
+    assert_eq!(dest.layout, left.layout);
+    assert_eq!(dest.layout, right.layout);
+
+    let (left, left_len) = this.operand_to_simd(left)?;
+    let (right, right_len) = this.operand_to_simd(right)?;
+    let (dest, dest_len) = this.place_to_simd(dest)?;
+
+    assert_eq!(dest_len, left_len);
+    assert_eq!(dest_len, right_len);
+
+    assert_eq!(dest.layout.size.bits() % 128, 0);
+    let num_chunks = dest.layout.size.bits() / 128;
+    assert_eq!(dest_len.checked_rem(num_chunks), Some(0));
+    let items_per_chunk = dest_len.checked_div(num_chunks).unwrap();
+
+    // Transmute to `[[T; items_per_chunk]; num_chunks]`
+    let element_layout = left.layout.field(this, 0);
+    let chunked_layout = this.layout_of(Ty::new_array(
+        this.tcx.tcx,
+        Ty::new_array(this.tcx.tcx, element_layout.ty, items_per_chunk),
+        num_chunks,
+    ))?;
+    let left = left.transmute(chunked_layout, this)?;
+    let right = right.transmute(chunked_layout, this)?;
+    let dest = dest.transmute(chunked_layout, this)?;
+
+    Ok((num_chunks, items_per_chunk, left, right, dest))
+}
+
 /// Horizontaly performs `which` operation on adjacent values of
 /// `left` and `right` SIMD vectors and stores the result in `dest`.
+/// "Horizontal" means that the i-th output element is calculated
+/// from the elements 2*i and 2*i+1 of the concatenation of `left` and
+/// `right`.
+///
+/// Each 128-bit chunk is treated independently (i.e., the value for
+/// the is i-th 128-bit chunk of `dest` is calculated with the i-th
+/// 128-bit chunks of `left` and `right`).
 fn horizontal_bin_op<'tcx>(
     this: &mut crate::MiriInterpCx<'_, 'tcx>,
     which: mir::BinOp,
@@ -573,32 +638,34 @@ fn horizontal_bin_op<'tcx>(
     right: &OpTy<'tcx, Provenance>,
     dest: &PlaceTy<'tcx, Provenance>,
 ) -> InterpResult<'tcx, ()> {
-    let (left, left_len) = this.operand_to_simd(left)?;
-    let (right, right_len) = this.operand_to_simd(right)?;
-    let (dest, dest_len) = this.place_to_simd(dest)?;
+    let (num_chunks, items_per_chunk, left, right, dest) =
+        split_simd_to_128bit_chunks(this, left, right, dest)?;
 
-    assert_eq!(dest_len, left_len);
-    assert_eq!(dest_len, right_len);
-    assert_eq!(dest_len % 2, 0);
+    let middle = items_per_chunk / 2;
+    for i in 0..num_chunks {
+        let left = this.project_index(&left, i)?;
+        let right = this.project_index(&right, i)?;
+        let dest = this.project_index(&dest, i)?;
 
-    let middle = dest_len / 2;
-    for i in 0..dest_len {
-        // `i` is the index in `dest`
-        // `j` is the index of the 2-item chunk in `src`
-        let (j, src) =
-            if i < middle { (i, &left) } else { (i.checked_sub(middle).unwrap(), &right) };
-        // `base_i` is the index of the first item of the 2-item chunk in `src`
-        let base_i = j.checked_mul(2).unwrap();
-        let lhs = this.read_immediate(&this.project_index(src, base_i)?)?;
-        let rhs = this.read_immediate(&this.project_index(src, base_i.checked_add(1).unwrap())?)?;
-
-        let res = if saturating {
-            Immediate::from(this.saturating_arith(which, &lhs, &rhs)?)
-        } else {
-            *this.wrapping_binary_op(which, &lhs, &rhs)?
-        };
+        for j in 0..items_per_chunk {
+            // `j` is the index in `dest`
+            // `k` is the index of the 2-item chunk in `src`
+            let (k, src) =
+                if j < middle { (j, &left) } else { (j.checked_sub(middle).unwrap(), &right) };
+            // `base_i` is the index of the first item of the 2-item chunk in `src`
+            let base_i = k.checked_mul(2).unwrap();
+            let lhs = this.read_immediate(&this.project_index(src, base_i)?)?;
+            let rhs =
+                this.read_immediate(&this.project_index(src, base_i.checked_add(1).unwrap())?)?;
+
+            let res = if saturating {
+                Immediate::from(this.saturating_arith(which, &lhs, &rhs)?)
+            } else {
+                *this.wrapping_binary_op(which, &lhs, &rhs)?
+            };
 
-        this.write_immediate(res, &this.project_index(&dest, i)?)?;
+            this.write_immediate(res, &this.project_index(&dest, j)?)?;
+        }
     }
 
     Ok(())
@@ -608,6 +675,10 @@ fn horizontal_bin_op<'tcx>(
 /// `left` and `right` using the high 4 bits in `imm`, sums the calculated
 /// products (up to 4), and conditionally stores the sum in `dest` using
 /// the low 4 bits of `imm`.
+///
+/// Each 128-bit chunk is treated independently (i.e., the value for
+/// the is i-th 128-bit chunk of `dest` is calculated with the i-th
+/// 128-bit blocks of `left` and `right`).
 fn conditional_dot_product<'tcx>(
     this: &mut crate::MiriInterpCx<'_, 'tcx>,
     left: &OpTy<'tcx, Provenance>,
@@ -615,39 +686,43 @@ fn conditional_dot_product<'tcx>(
     imm: &OpTy<'tcx, Provenance>,
     dest: &PlaceTy<'tcx, Provenance>,
 ) -> InterpResult<'tcx, ()> {
-    let (left, left_len) = this.operand_to_simd(left)?;
-    let (right, right_len) = this.operand_to_simd(right)?;
-    let (dest, dest_len) = this.place_to_simd(dest)?;
+    let (num_chunks, items_per_chunk, left, right, dest) =
+        split_simd_to_128bit_chunks(this, left, right, dest)?;
 
-    assert_eq!(left_len, right_len);
-    assert!(dest_len <= 4);
+    let element_layout = left.layout.field(this, 0).field(this, 0);
+    assert!(items_per_chunk <= 4);
 
-    let imm = this.read_scalar(imm)?.to_u8()?;
+    // `imm` is a `u8` for SSE4.1 or an `i32` for AVX :/
+    let imm = this.read_scalar(imm)?.to_uint(imm.layout.size)?;
 
-    let element_layout = left.layout.field(this, 0);
+    for i in 0..num_chunks {
+        let left = this.project_index(&left, i)?;
+        let right = this.project_index(&right, i)?;
+        let dest = this.project_index(&dest, i)?;
 
-    // Calculate dot product
-    // Elements are floating point numbers, but we can use `from_int`
-    // because the representation of 0.0 is all zero bits.
-    let mut sum = ImmTy::from_int(0u8, element_layout);
-    for i in 0..left_len {
-        if imm & (1 << i.checked_add(4).unwrap()) != 0 {
-            let left = this.read_immediate(&this.project_index(&left, i)?)?;
-            let right = this.read_immediate(&this.project_index(&right, i)?)?;
-
-            let mul = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?;
-            sum = this.wrapping_binary_op(mir::BinOp::Add, &sum, &mul)?;
+        // Calculate dot product
+        // Elements are floating point numbers, but we can use `from_int`
+        // for the initial value because the representation of 0.0 is all zero bits.
+        let mut sum = ImmTy::from_int(0u8, element_layout);
+        for j in 0..items_per_chunk {
+            if imm & (1 << j.checked_add(4).unwrap()) != 0 {
+                let left = this.read_immediate(&this.project_index(&left, j)?)?;
+                let right = this.read_immediate(&this.project_index(&right, j)?)?;
+
+                let mul = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?;
+                sum = this.wrapping_binary_op(mir::BinOp::Add, &sum, &mul)?;
+            }
         }
-    }
 
-    // Write to destination (conditioned to imm)
-    for i in 0..dest_len {
-        let dest = this.project_index(&dest, i)?;
+        // Write to destination (conditioned to imm)
+        for j in 0..items_per_chunk {
+            let dest = this.project_index(&dest, j)?;
 
-        if imm & (1 << i) != 0 {
-            this.write_immediate(*sum, &dest)?;
-        } else {
-            this.write_scalar(Scalar::from_int(0u8, element_layout.size), &dest)?;
+            if imm & (1 << j) != 0 {
+                this.write_immediate(*sum, &dest)?;
+            } else {
+                this.write_scalar(Scalar::from_int(0u8, element_layout.size), &dest)?;
+            }
         }
     }
 
@@ -684,3 +759,36 @@ fn test_bits_masked<'tcx>(
 
     Ok((all_zero, masked_set))
 }
+
+/// Calculates two booleans.
+///
+/// The first is true when the highest bit of each element of `op & mask` is zero.
+/// The second is true when the highest bit of each element of `!op & mask` is zero.
+fn test_high_bits_masked<'tcx>(
+    this: &crate::MiriInterpCx<'_, 'tcx>,
+    op: &OpTy<'tcx, Provenance>,
+    mask: &OpTy<'tcx, Provenance>,
+) -> InterpResult<'tcx, (bool, bool)> {
+    assert_eq!(op.layout, mask.layout);
+
+    let (op, op_len) = this.operand_to_simd(op)?;
+    let (mask, mask_len) = this.operand_to_simd(mask)?;
+
+    assert_eq!(op_len, mask_len);
+
+    let high_bit_offset = op.layout.field(this, 0).size.bits().checked_sub(1).unwrap();
+
+    let mut direct = true;
+    let mut negated = true;
+    for i in 0..op_len {
+        let op = this.project_index(&op, i)?;
+        let mask = this.project_index(&mask, i)?;
+
+        let op = this.read_scalar(&op)?.to_uint(op.layout.size)?;
+        let mask = this.read_scalar(&mask)?.to_uint(mask.layout.size)?;
+        direct &= (op & mask) >> high_bit_offset == 0;
+        negated &= (!op & mask) >> high_bit_offset == 0;
+    }
+
+    Ok((direct, negated))
+}
diff --git a/src/tools/miri/tests/pass/intrinsics-x86-avx.rs b/src/tools/miri/tests/pass/intrinsics-x86-avx.rs
index 933e3d4153ada..7d43cc596aedb 100644
--- a/src/tools/miri/tests/pass/intrinsics-x86-avx.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86-avx.rs
@@ -25,6 +25,528 @@ fn main() {
 
 #[target_feature(enable = "avx")]
 unsafe fn test_avx() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86/avx.rs
+
+    macro_rules! assert_approx_eq {
+        ($a:expr, $b:expr, $eps:expr) => {{
+            let (a, b) = (&$a, &$b);
+            assert!(
+                (*a - *b).abs() < $eps,
+                "assertion failed: `(left !== right)` \
+             (left: `{:?}`, right: `{:?}`, expect diff: `{:?}`, real diff: `{:?}`)",
+                *a,
+                *b,
+                $eps,
+                (*a - *b).abs()
+            );
+        }};
+    }
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_max_pd() {
+        let a = _mm256_setr_pd(1., 4., 5., 8.);
+        let b = _mm256_setr_pd(2., 3., 6., 7.);
+        let r = _mm256_max_pd(a, b);
+        let e = _mm256_setr_pd(2., 4., 6., 8.);
+        assert_eq_m256d(r, e);
+        // > If the values being compared are both 0.0s (of either sign), the
+        // > value in the second operand (source operand) is returned.
+        let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
+        let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
+        let wu: [u64; 4] = transmute(w);
+        let xu: [u64; 4] = transmute(x);
+        assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
+        assert_eq!(xu, [0u64; 4]);
+        // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
+        // > second operand (source operand), either a NaN or a valid
+        // > floating-point value, is written to the result.
+        let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
+        let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
+        let yf: [f64; 4] = transmute(y);
+        let zf: [f64; 4] = transmute(z);
+        assert_eq!(yf, [0.0; 4]);
+        assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
+    }
+    test_mm256_max_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_max_ps() {
+        let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
+        let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
+        let r = _mm256_max_ps(a, b);
+        let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
+        assert_eq_m256(r, e);
+        // > If the values being compared are both 0.0s (of either sign), the
+        // > value in the second operand (source operand) is returned.
+        let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
+        let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
+        let wu: [u32; 8] = transmute(w);
+        let xu: [u32; 8] = transmute(x);
+        assert_eq!(wu, [0x8000_0000u32; 8]);
+        assert_eq!(xu, [0u32; 8]);
+        // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
+        // > second operand (source operand), either a NaN or a valid
+        // > floating-point value, is written to the result.
+        let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
+        let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
+        let yf: [f32; 8] = transmute(y);
+        let zf: [f32; 8] = transmute(z);
+        assert_eq!(yf, [0.0; 8]);
+        assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
+    }
+    test_mm256_max_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_min_pd() {
+        let a = _mm256_setr_pd(1., 4., 5., 8.);
+        let b = _mm256_setr_pd(2., 3., 6., 7.);
+        let r = _mm256_min_pd(a, b);
+        let e = _mm256_setr_pd(1., 3., 5., 7.);
+        assert_eq_m256d(r, e);
+        // > If the values being compared are both 0.0s (of either sign), the
+        // > value in the second operand (source operand) is returned.
+        let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
+        let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
+        let wu: [u64; 4] = transmute(w);
+        let xu: [u64; 4] = transmute(x);
+        assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
+        assert_eq!(xu, [0u64; 4]);
+        // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
+        // > second operand (source operand), either a NaN or a valid
+        // > floating-point value, is written to the result.
+        let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
+        let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
+        let yf: [f64; 4] = transmute(y);
+        let zf: [f64; 4] = transmute(z);
+        assert_eq!(yf, [0.0; 4]);
+        assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
+    }
+    test_mm256_min_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_min_ps() {
+        let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
+        let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
+        let r = _mm256_min_ps(a, b);
+        let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
+        assert_eq_m256(r, e);
+        // > If the values being compared are both 0.0s (of either sign), the
+        // > value in the second operand (source operand) is returned.
+        let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
+        let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
+        let wu: [u32; 8] = transmute(w);
+        let xu: [u32; 8] = transmute(x);
+        assert_eq!(wu, [0x8000_0000u32; 8]);
+        assert_eq!(xu, [0u32; 8]);
+        // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
+        // > second operand (source operand), either a NaN or a valid
+        // > floating-point value, is written to the result.
+        let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
+        let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
+        let yf: [f32; 8] = transmute(y);
+        let zf: [f32; 8] = transmute(z);
+        assert_eq!(yf, [0.0; 8]);
+        assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
+    }
+    test_mm256_min_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_round_nearest_f32() {
+        #[target_feature(enable = "avx")]
+        unsafe fn test(x: f32, res: f32) {
+            let a = _mm256_set1_ps(x);
+            let e = _mm256_set1_ps(res);
+            let r = _mm256_round_ps::<_MM_FROUND_TO_NEAREST_INT>(a);
+            assert_eq_m256(r, e);
+            // Assume round-to-nearest by default
+            let r = _mm256_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
+            assert_eq_m256(r, e);
+        }
+
+        // Test rounding direction
+        test(-2.5, -2.0);
+        test(-1.75, -2.0);
+        test(-1.5, -2.0);
+        test(-1.25, -1.0);
+        test(-1.0, -1.0);
+        test(0.0, 0.0);
+        test(1.0, 1.0);
+        test(1.25, 1.0);
+        test(1.5, 2.0);
+        test(1.75, 2.0);
+        test(2.5, 2.0);
+
+        // Test that each element is rounded
+        let a = _mm256_setr_ps(1.5, 3.5, 5.5, 7.5, 9.5, 11.5, 13.5, 15.5);
+        let e = _mm256_setr_ps(2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0);
+        let r = _mm256_round_ps::<_MM_FROUND_TO_NEAREST_INT>(a);
+        assert_eq_m256(r, e);
+        // Assume round-to-nearest by default
+        let r = _mm256_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
+        assert_eq_m256(r, e);
+    }
+    test_round_nearest_f32();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_round_floor_f32() {
+        #[target_feature(enable = "avx")]
+        unsafe fn test(x: f32, res: f32) {
+            let a = _mm256_set1_ps(x);
+            let e = _mm256_set1_ps(res);
+            let r = _mm256_floor_ps(a);
+            assert_eq_m256(r, e);
+            let r = _mm256_round_ps::<_MM_FROUND_TO_NEG_INF>(a);
+            assert_eq_m256(r, e);
+        }
+
+        // Test rounding direction
+        test(-2.5, -3.0);
+        test(-1.75, -2.0);
+        test(-1.5, -2.0);
+        test(-1.25, -2.0);
+        test(-1.0, -1.0);
+        test(0.0, 0.0);
+        test(1.0, 1.0);
+        test(1.25, 1.0);
+        test(1.5, 1.0);
+        test(1.75, 1.0);
+        test(2.5, 2.0);
+
+        // Test that each element is rounded
+        let a = _mm256_setr_ps(1.5, 3.5, 5.5, 7.5, 9.5, 11.5, 13.5, 15.5);
+        let e = _mm256_setr_ps(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
+        let r = _mm256_floor_ps(a);
+        assert_eq_m256(r, e);
+        let r = _mm256_round_ps::<_MM_FROUND_TO_NEG_INF>(a);
+        assert_eq_m256(r, e);
+    }
+    test_round_floor_f32();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_round_ceil_f32() {
+        #[target_feature(enable = "avx")]
+        unsafe fn test(x: f32, res: f32) {
+            let a = _mm256_set1_ps(x);
+            let e = _mm256_set1_ps(res);
+            let r = _mm256_ceil_ps(a);
+            assert_eq_m256(r, e);
+            let r = _mm256_round_ps::<_MM_FROUND_TO_POS_INF>(a);
+            assert_eq_m256(r, e);
+        }
+
+        // Test rounding direction
+        test(-2.5, -2.0);
+        test(-1.75, -1.0);
+        test(-1.5, -1.0);
+        test(-1.25, -1.0);
+        test(-1.0, -1.0);
+        test(0.0, 0.0);
+        test(1.0, 1.0);
+        test(1.25, 2.0);
+        test(1.5, 2.0);
+        test(1.75, 2.0);
+        test(2.5, 3.0);
+
+        // Test that each element is rounded
+        let a = _mm256_setr_ps(1.5, 3.5, 5.5, 7.5, 9.5, 11.5, 13.5, 15.5);
+        let e = _mm256_setr_ps(2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0);
+        let r = _mm256_ceil_ps(a);
+        assert_eq_m256(r, e);
+        let r = _mm256_round_ps::<_MM_FROUND_TO_POS_INF>(a);
+        assert_eq_m256(r, e);
+    }
+    test_round_ceil_f32();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_round_trunc_f32() {
+        #[target_feature(enable = "avx")]
+        unsafe fn test(x: f32, res: f32) {
+            let a = _mm256_set1_ps(x);
+            let e = _mm256_set1_ps(res);
+            let r = _mm256_round_ps::<_MM_FROUND_TO_ZERO>(a);
+            assert_eq_m256(r, e);
+        }
+
+        // Test rounding direction
+        test(-2.5, -2.0);
+        test(-1.75, -1.0);
+        test(-1.5, -1.0);
+        test(-1.25, -1.0);
+        test(-1.0, -1.0);
+        test(0.0, 0.0);
+        test(1.0, 1.0);
+        test(1.25, 1.0);
+        test(1.5, 1.0);
+        test(1.75, 1.0);
+        test(2.5, 2.0);
+
+        // Test that each element is rounded
+        let a = _mm256_setr_ps(1.5, 3.5, 5.5, 7.5, 9.5, 11.5, 13.5, 15.5);
+        let e = _mm256_setr_ps(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
+        let r = _mm256_round_ps::<_MM_FROUND_TO_ZERO>(a);
+        assert_eq_m256(r, e);
+    }
+    test_round_trunc_f32();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_round_nearest_f64() {
+        #[target_feature(enable = "avx")]
+        unsafe fn test(x: f64, res: f64) {
+            let a = _mm256_set1_pd(x);
+            let e = _mm256_set1_pd(res);
+            let r = _mm256_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
+            assert_eq_m256d(r, e);
+            // Assume round-to-nearest by default
+            let r = _mm256_round_pd::<_MM_FROUND_CUR_DIRECTION>(a);
+            assert_eq_m256d(r, e);
+        }
+
+        // Test rounding direction
+        test(-2.5, -2.0);
+        test(-1.75, -2.0);
+        test(-1.5, -2.0);
+        test(-1.25, -1.0);
+        test(-1.0, -1.0);
+        test(0.0, 0.0);
+        test(1.0, 1.0);
+        test(1.25, 1.0);
+        test(1.5, 2.0);
+        test(1.75, 2.0);
+        test(2.5, 2.0);
+
+        // Test that each element is rounded
+        let a = _mm256_setr_pd(1.5, 3.5, 5.5, 7.5);
+        let e = _mm256_setr_pd(2.0, 4.0, 6.0, 8.0);
+        let r = _mm256_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
+        assert_eq_m256d(r, e);
+        // Assume round-to-nearest by default
+        let r = _mm256_round_pd::<_MM_FROUND_CUR_DIRECTION>(a);
+        assert_eq_m256d(r, e);
+    }
+    test_round_nearest_f64();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_round_floor_f64() {
+        #[target_feature(enable = "avx")]
+        unsafe fn test(x: f64, res: f64) {
+            let a = _mm256_set1_pd(x);
+            let e = _mm256_set1_pd(res);
+            let r = _mm256_floor_pd(a);
+            assert_eq_m256d(r, e);
+            let r = _mm256_round_pd::<_MM_FROUND_TO_NEG_INF>(a);
+            assert_eq_m256d(r, e);
+        }
+
+        // Test rounding direction
+        test(-2.5, -3.0);
+        test(-1.75, -2.0);
+        test(-1.5, -2.0);
+        test(-1.25, -2.0);
+        test(-1.0, -1.0);
+        test(0.0, 0.0);
+        test(1.0, 1.0);
+        test(1.25, 1.0);
+        test(1.5, 1.0);
+        test(1.75, 1.0);
+        test(2.5, 2.0);
+
+        // Test that each element is rounded
+        let a = _mm256_setr_pd(1.5, 3.5, 5.5, 7.5);
+        let e = _mm256_setr_pd(1.0, 3.0, 5.0, 7.0);
+        let r = _mm256_floor_pd(a);
+        assert_eq_m256d(r, e);
+        let r = _mm256_round_pd::<_MM_FROUND_TO_NEG_INF>(a);
+        assert_eq_m256d(r, e);
+    }
+    test_round_floor_f64();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_round_ceil_f64() {
+        #[target_feature(enable = "avx")]
+        unsafe fn test(x: f64, res: f64) {
+            let a = _mm256_set1_pd(x);
+            let e = _mm256_set1_pd(res);
+            let r = _mm256_ceil_pd(a);
+            assert_eq_m256d(r, e);
+            let r = _mm256_round_pd::<_MM_FROUND_TO_POS_INF>(a);
+            assert_eq_m256d(r, e);
+        }
+
+        // Test rounding direction
+        test(-2.5, -2.0);
+        test(-1.75, -1.0);
+        test(-1.5, -1.0);
+        test(-1.25, -1.0);
+        test(-1.0, -1.0);
+        test(0.0, 0.0);
+        test(1.0, 1.0);
+        test(1.25, 2.0);
+        test(1.5, 2.0);
+        test(1.75, 2.0);
+        test(2.5, 3.0);
+
+        // Test that each element is rounded
+        let a = _mm256_setr_pd(1.5, 3.5, 5.5, 7.5);
+        let e = _mm256_setr_pd(2.0, 4.0, 6.0, 8.0);
+        let r = _mm256_ceil_pd(a);
+        assert_eq_m256d(r, e);
+        let r = _mm256_round_pd::<_MM_FROUND_TO_POS_INF>(a);
+        assert_eq_m256d(r, e);
+    }
+    test_round_ceil_f64();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_round_trunc_f64() {
+        #[target_feature(enable = "avx")]
+        unsafe fn test(x: f64, res: f64) {
+            let a = _mm256_set1_pd(x);
+            let e = _mm256_set1_pd(res);
+            let r = _mm256_round_pd::<_MM_FROUND_TO_ZERO>(a);
+            assert_eq_m256d(r, e);
+        }
+
+        // Test rounding direction
+        test(-2.5, -2.0);
+        test(-1.75, -1.0);
+        test(-1.5, -1.0);
+        test(-1.25, -1.0);
+        test(-1.0, -1.0);
+        test(0.0, 0.0);
+        test(1.0, 1.0);
+        test(1.25, 1.0);
+        test(1.5, 1.0);
+        test(1.75, 1.0);
+        test(2.5, 2.0);
+
+        // Test that each element is rounded
+        let a = _mm256_setr_pd(1.5, 3.5, 5.5, 7.5);
+        let e = _mm256_setr_pd(1.0, 3.0, 5.0, 7.0);
+        let r = _mm256_round_pd::<_MM_FROUND_TO_ZERO>(a);
+        assert_eq_m256d(r, e);
+    }
+    test_round_trunc_f64();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_sqrt_ps() {
+        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
+        let r = _mm256_sqrt_ps(a);
+        let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.);
+        assert_eq_m256(r, e);
+    }
+    test_mm256_sqrt_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_rcp_ps() {
+        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+        let r = _mm256_rcp_ps(a);
+        #[rustfmt::skip]
+        let e = _mm256_setr_ps(
+            0.99975586, 0.49987793, 0.33325195, 0.24993896,
+            0.19995117, 0.16662598, 0.14282227, 0.12496948,
+        );
+        let rel_err = 0.00048828125;
+
+        let r: [f32; 8] = transmute(r);
+        let e: [f32; 8] = transmute(e);
+        for i in 0..8 {
+            assert_approx_eq!(r[i], e[i], 2. * rel_err);
+        }
+    }
+    test_mm256_rcp_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_rsqrt_ps() {
+        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+        let r = _mm256_rsqrt_ps(a);
+        #[rustfmt::skip]
+        let e = _mm256_setr_ps(
+            0.99975586, 0.7069092, 0.5772705, 0.49987793,
+            0.44714355, 0.40820313, 0.3779297, 0.3534546,
+        );
+        let rel_err = 0.00048828125;
+
+        let r: [f32; 8] = transmute(r);
+        let e: [f32; 8] = transmute(e);
+        for i in 0..8 {
+            assert_approx_eq!(r[i], e[i], 2. * rel_err);
+        }
+    }
+    test_mm256_rsqrt_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_dp_ps() {
+        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
+        let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
+        let r = _mm256_dp_ps::<0xFF>(a, b);
+        let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
+        assert_eq_m256(r, e);
+    }
+    test_mm256_dp_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_hadd_pd() {
+        let a = _mm256_setr_pd(4., 9., 16., 25.);
+        let b = _mm256_setr_pd(4., 3., 2., 5.);
+        let r = _mm256_hadd_pd(a, b);
+        let e = _mm256_setr_pd(13., 7., 41., 7.);
+        assert_eq_m256d(r, e);
+
+        let a = _mm256_setr_pd(1., 2., 3., 4.);
+        let b = _mm256_setr_pd(5., 6., 7., 8.);
+        let r = _mm256_hadd_pd(a, b);
+        let e = _mm256_setr_pd(3., 11., 7., 15.);
+        assert_eq_m256d(r, e);
+    }
+    test_mm256_hadd_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_hadd_ps() {
+        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
+        let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
+        let r = _mm256_hadd_ps(a, b);
+        let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.);
+        assert_eq_m256(r, e);
+
+        let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
+        let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
+        let r = _mm256_hadd_ps(a, b);
+        let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.);
+        assert_eq_m256(r, e);
+    }
+    test_mm256_hadd_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_hsub_pd() {
+        let a = _mm256_setr_pd(4., 9., 16., 25.);
+        let b = _mm256_setr_pd(4., 3., 2., 5.);
+        let r = _mm256_hsub_pd(a, b);
+        let e = _mm256_setr_pd(-5., 1., -9., -3.);
+        assert_eq_m256d(r, e);
+
+        let a = _mm256_setr_pd(1., 2., 3., 4.);
+        let b = _mm256_setr_pd(5., 6., 7., 8.);
+        let r = _mm256_hsub_pd(a, b);
+        let e = _mm256_setr_pd(-1., -1., -1., -1.);
+        assert_eq_m256d(r, e);
+    }
+    test_mm256_hsub_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_hsub_ps() {
+        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
+        let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
+        let r = _mm256_hsub_ps(a, b);
+        let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.);
+        assert_eq_m256(r, e);
+
+        let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
+        let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
+        let r = _mm256_hsub_ps(a, b);
+        let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.);
+        assert_eq_m256(r, e);
+    }
+    test_mm256_hsub_ps();
+
     fn expected_cmp<F: PartialOrd>(imm: i32, lhs: F, rhs: F, if_t: F, if_f: F) -> F {
         let res = match imm {
             _CMP_EQ_OQ => lhs == rhs,
@@ -135,12 +657,54 @@ unsafe fn test_avx() {
         }
     }
 
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_cmp_ps<const IMM: i32>() {
+        let values = [
+            (1.0, 1.0),
+            (0.0, 1.0),
+            (1.0, 0.0),
+            (f32::NAN, 0.0),
+            (0.0, f32::NAN),
+            (f32::NAN, f32::NAN),
+        ];
+
+        for (lhs, rhs) in values {
+            let a = _mm256_set1_ps(lhs);
+            let b = _mm256_set1_ps(rhs);
+            let r: [u32; 8] = transmute(_mm256_cmp_ps::<IMM>(a, b));
+            let e: [u32; 8] = transmute(_mm256_set1_ps(expected_cmp_f32(IMM, lhs, rhs)));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_cmp_pd<const IMM: i32>() {
+        let values = [
+            (1.0, 1.0),
+            (0.0, 1.0),
+            (1.0, 0.0),
+            (f64::NAN, 0.0),
+            (0.0, f64::NAN),
+            (f64::NAN, f64::NAN),
+        ];
+
+        for (lhs, rhs) in values {
+            let a = _mm256_set1_pd(lhs);
+            let b = _mm256_set1_pd(rhs);
+            let r: [u64; 4] = transmute(_mm256_cmp_pd::<IMM>(a, b));
+            let e: [u64; 4] = transmute(_mm256_set1_pd(expected_cmp_f64(IMM, lhs, rhs)));
+            assert_eq!(r, e);
+        }
+    }
+
     #[target_feature(enable = "avx")]
     unsafe fn test_cmp<const IMM: i32>() {
         test_mm_cmp_ss::<IMM>();
         test_mm_cmp_ps::<IMM>();
         test_mm_cmp_sd::<IMM>();
         test_mm_cmp_pd::<IMM>();
+        test_mm256_cmp_ps::<IMM>();
+        test_mm256_cmp_pd::<IMM>();
     }
 
     test_cmp::<_CMP_EQ_OQ>();
@@ -159,4 +723,709 @@ unsafe fn test_avx() {
     test_cmp::<_CMP_GE_OS>();
     test_cmp::<_CMP_GT_OS>();
     test_cmp::<_CMP_TRUE_US>();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_cvtps_epi32() {
+        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
+        let r = _mm256_cvtps_epi32(a);
+        let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
+        assert_eq_m256i(r, e);
+
+        let a = _mm256_setr_ps(
+            f32::NEG_INFINITY,
+            f32::INFINITY,
+            f32::MIN,
+            f32::MAX,
+            f32::NAN,
+            f32::NAN,
+            f32::NAN,
+            f32::NAN,
+        );
+        let r = _mm256_cvtps_epi32(a);
+        assert_eq_m256i(r, _mm256_set1_epi32(i32::MIN));
+    }
+    test_mm256_cvtps_epi32();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_cvttps_epi32() {
+        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
+        let r = _mm256_cvttps_epi32(a);
+        let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
+        assert_eq_m256i(r, e);
+
+        let a = _mm256_setr_ps(
+            f32::NEG_INFINITY,
+            f32::INFINITY,
+            f32::MIN,
+            f32::MAX,
+            f32::NAN,
+            f32::NAN,
+            f32::NAN,
+            f32::NAN,
+        );
+        let r = _mm256_cvttps_epi32(a);
+        assert_eq_m256i(r, _mm256_set1_epi32(i32::MIN));
+    }
+    test_mm256_cvttps_epi32();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_cvtpd_epi32() {
+        let a = _mm256_setr_pd(4., 9., 16., 25.);
+        let r = _mm256_cvtpd_epi32(a);
+        let e = _mm_setr_epi32(4, 9, 16, 25);
+        assert_eq_m128i(r, e);
+
+        let a = _mm256_setr_pd(f64::NEG_INFINITY, f64::INFINITY, f64::MIN, f64::MAX);
+        let r = _mm256_cvtpd_epi32(a);
+        assert_eq_m128i(r, _mm_set1_epi32(i32::MIN));
+    }
+    test_mm256_cvtpd_epi32();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_cvttpd_epi32() {
+        let a = _mm256_setr_pd(4., 9., 16., 25.);
+        let r = _mm256_cvttpd_epi32(a);
+        let e = _mm_setr_epi32(4, 9, 16, 25);
+        assert_eq_m128i(r, e);
+
+        let a = _mm256_setr_pd(f64::NEG_INFINITY, f64::INFINITY, f64::MIN, f64::MAX);
+        let r = _mm256_cvttpd_epi32(a);
+        assert_eq_m128i(r, _mm_set1_epi32(i32::MIN));
+    }
+    test_mm256_cvttpd_epi32();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_permutevar_ps() {
+        let a = _mm_setr_ps(4., 3., 2., 5.);
+        let b = _mm_setr_epi32(1, 2, 3, 4);
+        let r = _mm_permutevar_ps(a, b);
+        let e = _mm_setr_ps(3., 2., 5., 4.);
+        assert_eq_m128(r, e);
+    }
+    test_mm_permutevar_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_permutevar_ps() {
+        let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
+        let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = _mm256_permutevar_ps(a, b);
+        let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.);
+        assert_eq_m256(r, e);
+    }
+    test_mm256_permutevar_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_permutevar_pd() {
+        let a = _mm_setr_pd(4., 3.);
+        let b = _mm_setr_epi64x(3, 0);
+        let r = _mm_permutevar_pd(a, b);
+        let e = _mm_setr_pd(3., 4.);
+        assert_eq_m128d(r, e);
+    }
+    test_mm_permutevar_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_permutevar_pd() {
+        let a = _mm256_setr_pd(4., 3., 2., 5.);
+        let b = _mm256_setr_epi64x(1, 2, 3, 4);
+        let r = _mm256_permutevar_pd(a, b);
+        let e = _mm256_setr_pd(4., 3., 5., 2.);
+        assert_eq_m256d(r, e);
+    }
+    test_mm256_permutevar_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_permute2f128_ps() {
+        let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
+        let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
+        let r = _mm256_permute2f128_ps::<0x13>(a, b);
+        let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
+        assert_eq_m256(r, e);
+
+        let r = _mm256_permute2f128_ps::<0x44>(a, b);
+        let e = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
+        assert_eq_m256(r, e);
+    }
+    test_mm256_permute2f128_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_permute2f128_pd() {
+        let a = _mm256_setr_pd(1., 2., 3., 4.);
+        let b = _mm256_setr_pd(5., 6., 7., 8.);
+        let r = _mm256_permute2f128_pd::<0x31>(a, b);
+        let e = _mm256_setr_pd(3., 4., 7., 8.);
+        assert_eq_m256d(r, e);
+
+        let r = _mm256_permute2f128_pd::<0x44>(a, b);
+        let e = _mm256_setr_pd(0.0, 0.0, 0.0, 0.0);
+        assert_eq_m256d(r, e);
+    }
+    test_mm256_permute2f128_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_permute2f128_si256() {
+        let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
+        let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
+        let r = _mm256_permute2f128_si256::<0x20>(a, b);
+        let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+        assert_eq_m256i(r, e);
+
+        let r = _mm256_permute2f128_si256::<0x44>(a, b);
+        let e = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m256i(r, e);
+    }
+    test_mm256_permute2f128_si256();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_maskload_ps() {
+        let a = &[1.0f32, 2., 3., 4.];
+        let mask = _mm_setr_epi32(0, !0, 0, !0);
+        let r = _mm_maskload_ps(a.as_ptr(), mask);
+        let e = _mm_setr_ps(0., 2., 0., 4.);
+        assert_eq_m128(r, e);
+
+        // Unaligned pointer
+        let a = Unaligned::new([1.0f32, 2., 3., 4.]);
+        let mask = _mm_setr_epi32(0, !0, 0, !0);
+        let r = _mm_maskload_ps(a.as_ptr().cast(), mask);
+        let e = _mm_setr_ps(0., 2., 0., 4.);
+        assert_eq_m128(r, e);
+
+        // Only loading first element, so slice can be short.
+        let a = &[2.0f32];
+        let mask = _mm_setr_epi32(!0, 0, 0, 0);
+        let r = _mm_maskload_ps(a.as_ptr(), mask);
+        let e = _mm_setr_ps(2.0, 0.0, 0.0, 0.0);
+        assert_eq_m128(r, e);
+
+        // Only loading last element, so slice can be short.
+        let a = &[2.0f32];
+        let mask = _mm_setr_epi32(0, 0, 0, !0);
+        let r = _mm_maskload_ps(a.as_ptr().wrapping_sub(3), mask);
+        let e = _mm_setr_ps(0.0, 0.0, 0.0, 2.0);
+        assert_eq_m128(r, e);
+    }
+    test_mm_maskload_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_maskload_pd() {
+        let a = &[1.0f64, 2.];
+        let mask = _mm_setr_epi64x(0, !0);
+        let r = _mm_maskload_pd(a.as_ptr(), mask);
+        let e = _mm_setr_pd(0., 2.);
+        assert_eq_m128d(r, e);
+
+        // Unaligned pointer
+        let a = Unaligned::new([1.0f64, 2.]);
+        let mask = _mm_setr_epi64x(0, !0);
+        let r = _mm_maskload_pd(a.as_ptr().cast(), mask);
+        let e = _mm_setr_pd(0., 2.);
+        assert_eq_m128d(r, e);
+
+        // Only loading first element, so slice can be short.
+        let a = &[2.0f64];
+        let mask = _mm_setr_epi64x(!0, 0);
+        let r = _mm_maskload_pd(a.as_ptr(), mask);
+        let e = _mm_setr_pd(2.0, 0.0);
+        assert_eq_m128d(r, e);
+
+        // Only loading last element, so slice can be short.
+        let a = &[2.0f64];
+        let mask = _mm_setr_epi64x(0, !0);
+        let r = _mm_maskload_pd(a.as_ptr().wrapping_sub(1), mask);
+        let e = _mm_setr_pd(0.0, 2.0);
+        assert_eq_m128d(r, e);
+    }
+    test_mm_maskload_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_maskload_ps() {
+        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
+        let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
+        let r = _mm256_maskload_ps(a.as_ptr(), mask);
+        let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
+        assert_eq_m256(r, e);
+
+        // Unaligned pointer
+        let a = Unaligned::new([1.0f32, 2., 3., 4., 5., 6., 7., 8.]);
+        let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
+        let r = _mm256_maskload_ps(a.as_ptr().cast(), mask);
+        let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
+        assert_eq_m256(r, e);
+
+        // Only loading first element, so slice can be short.
+        let a = &[2.0f32];
+        let mask = _mm256_setr_epi32(!0, 0, 0, 0, 0, 0, 0, 0);
+        let r = _mm256_maskload_ps(a.as_ptr(), mask);
+        let e = _mm256_setr_ps(2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
+        assert_eq_m256(r, e);
+
+        // Only loading last element, so slice can be short.
+        let a = &[2.0f32];
+        let mask = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, !0);
+        let r = _mm256_maskload_ps(a.as_ptr().wrapping_sub(7), mask);
+        let e = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
+        assert_eq_m256(r, e);
+    }
+    test_mm256_maskload_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_maskload_pd() {
+        let a = &[1.0f64, 2., 3., 4.];
+        let mask = _mm256_setr_epi64x(0, !0, 0, !0);
+        let r = _mm256_maskload_pd(a.as_ptr(), mask);
+        let e = _mm256_setr_pd(0., 2., 0., 4.);
+        assert_eq_m256d(r, e);
+
+        // Unaligned pointer
+        let a = Unaligned::new([1.0f64, 2., 3., 4.]);
+        let mask = _mm256_setr_epi64x(0, !0, 0, !0);
+        let r = _mm256_maskload_pd(a.as_ptr().cast(), mask);
+        let e = _mm256_setr_pd(0., 2., 0., 4.);
+        assert_eq_m256d(r, e);
+
+        // Only loading first element, so slice can be short.
+        let a = &[2.0f64];
+        let mask = _mm256_setr_epi64x(!0, 0, 0, 0);
+        let r = _mm256_maskload_pd(a.as_ptr(), mask);
+        let e = _mm256_setr_pd(2.0, 0.0, 0.0, 0.0);
+        assert_eq_m256d(r, e);
+
+        // Only loading last element, so slice can be short.
+        let a = &[2.0f64];
+        let mask = _mm256_setr_epi64x(0, 0, 0, !0);
+        let r = _mm256_maskload_pd(a.as_ptr().wrapping_sub(3), mask);
+        let e = _mm256_setr_pd(0.0, 0.0, 0.0, 2.0);
+        assert_eq_m256d(r, e);
+    }
+    test_mm256_maskload_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_maskstore_ps() {
+        let mut r = _mm_set1_ps(0.);
+        let mask = _mm_setr_epi32(0, !0, 0, !0);
+        let a = _mm_setr_ps(1., 2., 3., 4.);
+        _mm_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a);
+        let e = _mm_setr_ps(0., 2., 0., 4.);
+        assert_eq_m128(r, e);
+
+        // Unaligned pointer
+        let mut r = Unaligned::new([0.0f32; 4]);
+        let mask = _mm_setr_epi32(0, !0, 0, !0);
+        let a = _mm_setr_ps(1., 2., 3., 4.);
+        _mm_maskstore_ps(r.as_mut_ptr().cast(), mask, a);
+        let e = [0., 2., 0., 4.];
+        assert_eq!(r.read(), e);
+
+        // Only storing first element, so slice can be short.
+        let mut r = [0.0f32];
+        let mask = _mm_setr_epi32(!0, 0, 0, 0);
+        let a = _mm_setr_ps(1., 2., 3., 4.);
+        _mm_maskstore_ps(r.as_mut_ptr(), mask, a);
+        let e = [1.0f32];
+        assert_eq!(r, e);
+
+        // Only storing last element, so slice can be short.
+        let mut r = [0.0f32];
+        let mask = _mm_setr_epi32(0, 0, 0, !0);
+        let a = _mm_setr_ps(1., 2., 3., 4.);
+        _mm_maskstore_ps(r.as_mut_ptr().wrapping_sub(3), mask, a);
+        let e = [4.0f32];
+        assert_eq!(r, e);
+    }
+    test_mm_maskstore_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_maskstore_pd() {
+        let mut r = _mm_set1_pd(0.);
+        let mask = _mm_setr_epi64x(0, !0);
+        let a = _mm_setr_pd(1., 2.);
+        _mm_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a);
+        let e = _mm_setr_pd(0., 2.);
+        assert_eq_m128d(r, e);
+
+        // Unaligned pointer
+        let mut r = Unaligned::new([0.0f64; 2]);
+        let mask = _mm_setr_epi64x(0, !0);
+        let a = _mm_setr_pd(1., 2.);
+        _mm_maskstore_pd(r.as_mut_ptr().cast(), mask, a);
+        let e = [0., 2.];
+        assert_eq!(r.read(), e);
+
+        // Only storing first element, so slice can be short.
+        let mut r = [0.0f64];
+        let mask = _mm_setr_epi64x(!0, 0);
+        let a = _mm_setr_pd(1., 2.);
+        _mm_maskstore_pd(r.as_mut_ptr(), mask, a);
+        let e = [1.0f64];
+        assert_eq!(r, e);
+
+        // Only storing last element, so slice can be short.
+        let mut r = [0.0f64];
+        let mask = _mm_setr_epi64x(0, !0);
+        let a = _mm_setr_pd(1., 2.);
+        _mm_maskstore_pd(r.as_mut_ptr().wrapping_sub(1), mask, a);
+        let e = [2.0f64];
+        assert_eq!(r, e);
+    }
+    test_mm_maskstore_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_maskstore_ps() {
+        let mut r = _mm256_set1_ps(0.);
+        let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
+        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+        _mm256_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a);
+        let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
+        assert_eq_m256(r, e);
+
+        // Unaligned pointer
+        let mut r = Unaligned::new([0.0f32; 8]);
+        let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
+        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+        _mm256_maskstore_ps(r.as_mut_ptr().cast(), mask, a);
+        let e = [0., 2., 0., 4., 0., 6., 0., 8.];
+        assert_eq!(r.read(), e);
+
+        // Only storing first element, so slice can be short.
+        let mut r = [0.0f32];
+        let mask = _mm256_setr_epi32(!0, 0, 0, 0, 0, 0, 0, 0);
+        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+        _mm256_maskstore_ps(r.as_mut_ptr(), mask, a);
+        let e = [1.0f32];
+        assert_eq!(r, e);
+
+        // Only storing last element, so slice can be short.
+        let mut r = [0.0f32];
+        let mask = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, !0);
+        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+        _mm256_maskstore_ps(r.as_mut_ptr().wrapping_sub(7), mask, a);
+        let e = [8.0f32];
+        assert_eq!(r, e);
+    }
+    test_mm256_maskstore_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_maskstore_pd() {
+        let mut r = _mm256_set1_pd(0.);
+        let mask = _mm256_setr_epi64x(0, !0, 0, !0);
+        let a = _mm256_setr_pd(1., 2., 3., 4.);
+        _mm256_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a);
+        let e = _mm256_setr_pd(0., 2., 0., 4.);
+        assert_eq_m256d(r, e);
+
+        // Unaligned pointer
+        let mut r = Unaligned::new([0.0f64; 4]);
+        let mask = _mm256_setr_epi64x(0, !0, 0, !0);
+        let a = _mm256_setr_pd(1., 2., 3., 4.);
+        _mm256_maskstore_pd(r.as_mut_ptr().cast(), mask, a);
+        let e = [0., 2., 0., 4.];
+        assert_eq!(r.read(), e);
+
+        // Only storing first element, so slice can be short.
+        let mut r = [0.0f64];
+        let mask = _mm256_setr_epi64x(!0, 0, 0, 0);
+        let a = _mm256_setr_pd(1., 2., 3., 4.);
+        _mm256_maskstore_pd(r.as_mut_ptr(), mask, a);
+        let e = [1.0f64];
+        assert_eq!(r, e);
+
+        // Only storing last element, so slice can be short.
+        let mut r = [0.0f64];
+        let mask = _mm256_setr_epi64x(0, 0, 0, !0);
+        let a = _mm256_setr_pd(1., 2., 3., 4.);
+        _mm256_maskstore_pd(r.as_mut_ptr().wrapping_sub(3), mask, a);
+        let e = [4.0f64];
+        assert_eq!(r, e);
+    }
+    test_mm256_maskstore_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_lddqu_si256() {
+        #[rustfmt::skip]
+        let a = _mm256_setr_epi8(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15, 16,
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        let p = &a as *const _;
+        let r = _mm256_lddqu_si256(p);
+        #[rustfmt::skip]
+        let e = _mm256_setr_epi8(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15, 16,
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        assert_eq_m256i(r, e);
+    }
+    test_mm256_lddqu_si256();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_testz_si256() {
+        let a = _mm256_setr_epi64x(1, 2, 3, 4);
+        let b = _mm256_setr_epi64x(5, 6, 7, 8);
+        let r = _mm256_testz_si256(a, b);
+        assert_eq!(r, 0);
+        let b = _mm256_set1_epi64x(0);
+        let r = _mm256_testz_si256(a, b);
+        assert_eq!(r, 1);
+    }
+    test_mm256_testz_si256();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_testc_si256() {
+        let a = _mm256_setr_epi64x(1, 2, 3, 4);
+        let b = _mm256_setr_epi64x(5, 6, 7, 8);
+        let r = _mm256_testc_si256(a, b);
+        assert_eq!(r, 0);
+        let b = _mm256_set1_epi64x(0);
+        let r = _mm256_testc_si256(a, b);
+        assert_eq!(r, 1);
+    }
+    test_mm256_testc_si256();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_testnzc_si256() {
+        let a = _mm256_setr_epi64x(1, 2, 3, 4);
+        let b = _mm256_setr_epi64x(5, 6, 7, 8);
+        let r = _mm256_testnzc_si256(a, b);
+        assert_eq!(r, 1);
+        let a = _mm256_setr_epi64x(0, 0, 0, 0);
+        let b = _mm256_setr_epi64x(0, 0, 0, 0);
+        let r = _mm256_testnzc_si256(a, b);
+        assert_eq!(r, 0);
+    }
+    test_mm256_testnzc_si256();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_testz_pd() {
+        let a = _mm256_setr_pd(1., 2., 3., 4.);
+        let b = _mm256_setr_pd(5., 6., 7., 8.);
+        let r = _mm256_testz_pd(a, b);
+        assert_eq!(r, 1);
+        let a = _mm256_set1_pd(-1.);
+        let r = _mm256_testz_pd(a, a);
+        assert_eq!(r, 0);
+    }
+    test_mm256_testz_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_testc_pd() {
+        let a = _mm256_setr_pd(1., 2., 3., 4.);
+        let b = _mm256_setr_pd(5., 6., 7., 8.);
+        let r = _mm256_testc_pd(a, b);
+        assert_eq!(r, 1);
+        let a = _mm256_set1_pd(1.);
+        let b = _mm256_set1_pd(-1.);
+        let r = _mm256_testc_pd(a, b);
+        assert_eq!(r, 0);
+    }
+    test_mm256_testc_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_testnzc_pd() {
+        let a = _mm256_setr_pd(1., 2., 3., 4.);
+        let b = _mm256_setr_pd(5., 6., 7., 8.);
+        let r = _mm256_testnzc_pd(a, b);
+        assert_eq!(r, 0);
+        let a = _mm256_setr_pd(1., -1., -1., -1.);
+        let b = _mm256_setr_pd(-1., -1., 1., 1.);
+        let r = _mm256_testnzc_pd(a, b);
+        assert_eq!(r, 1);
+    }
+    test_mm256_testnzc_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_testz_pd() {
+        let a = _mm_setr_pd(1., 2.);
+        let b = _mm_setr_pd(5., 6.);
+        let r = _mm_testz_pd(a, b);
+        assert_eq!(r, 1);
+        let a = _mm_set1_pd(-1.);
+        let r = _mm_testz_pd(a, a);
+        assert_eq!(r, 0);
+    }
+    test_mm_testz_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_testc_pd() {
+        let a = _mm_setr_pd(1., 2.);
+        let b = _mm_setr_pd(5., 6.);
+        let r = _mm_testc_pd(a, b);
+        assert_eq!(r, 1);
+        let a = _mm_set1_pd(1.);
+        let b = _mm_set1_pd(-1.);
+        let r = _mm_testc_pd(a, b);
+        assert_eq!(r, 0);
+    }
+    test_mm_testc_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_testnzc_pd() {
+        let a = _mm_setr_pd(1., 2.);
+        let b = _mm_setr_pd(5., 6.);
+        let r = _mm_testnzc_pd(a, b);
+        assert_eq!(r, 0);
+        let a = _mm_setr_pd(1., -1.);
+        let b = _mm_setr_pd(-1., -1.);
+        let r = _mm_testnzc_pd(a, b);
+        assert_eq!(r, 1);
+    }
+    test_mm_testnzc_pd();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_testz_ps() {
+        let a = _mm256_set1_ps(1.);
+        let r = _mm256_testz_ps(a, a);
+        assert_eq!(r, 1);
+        let a = _mm256_set1_ps(-1.);
+        let r = _mm256_testz_ps(a, a);
+        assert_eq!(r, 0);
+    }
+    test_mm256_testz_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_testc_ps() {
+        let a = _mm256_set1_ps(1.);
+        let r = _mm256_testc_ps(a, a);
+        assert_eq!(r, 1);
+        let b = _mm256_set1_ps(-1.);
+        let r = _mm256_testc_ps(a, b);
+        assert_eq!(r, 0);
+    }
+    test_mm256_testc_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm256_testnzc_ps() {
+        let a = _mm256_set1_ps(1.);
+        let r = _mm256_testnzc_ps(a, a);
+        assert_eq!(r, 0);
+        let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.);
+        let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.);
+        let r = _mm256_testnzc_ps(a, b);
+        assert_eq!(r, 1);
+    }
+    test_mm256_testnzc_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_testz_ps() {
+        let a = _mm_set1_ps(1.);
+        let r = _mm_testz_ps(a, a);
+        assert_eq!(r, 1);
+        let a = _mm_set1_ps(-1.);
+        let r = _mm_testz_ps(a, a);
+        assert_eq!(r, 0);
+    }
+    test_mm_testz_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_testc_ps() {
+        let a = _mm_set1_ps(1.);
+        let r = _mm_testc_ps(a, a);
+        assert_eq!(r, 1);
+        let b = _mm_set1_ps(-1.);
+        let r = _mm_testc_ps(a, b);
+        assert_eq!(r, 0);
+    }
+    test_mm_testc_ps();
+
+    #[target_feature(enable = "avx")]
+    unsafe fn test_mm_testnzc_ps() {
+        let a = _mm_set1_ps(1.);
+        let r = _mm_testnzc_ps(a, a);
+        assert_eq!(r, 0);
+        let a = _mm_setr_ps(1., -1., -1., -1.);
+        let b = _mm_setr_ps(-1., -1., 1., 1.);
+        let r = _mm_testnzc_ps(a, b);
+        assert_eq!(r, 1);
+    }
+    test_mm_testnzc_ps();
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
+    _mm_set_epi64x(b, a)
+}
+
+#[track_caller]
+#[target_feature(enable = "sse")]
+unsafe fn assert_eq_m128(a: __m128, b: __m128) {
+    let r = _mm_cmpeq_ps(a, b);
+    if _mm_movemask_ps(r) != 0b1111 {
+        panic!("{:?} != {:?}", a, b);
+    }
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
+    if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
+        panic!("{:?} != {:?}", a, b);
+    }
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
+    assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
+}
+
+#[track_caller]
+#[target_feature(enable = "avx")]
+unsafe fn assert_eq_m256(a: __m256, b: __m256) {
+    let cmp = _mm256_cmp_ps::<_CMP_EQ_OQ>(a, b);
+    if _mm256_movemask_ps(cmp) != 0b11111111 {
+        panic!("{:?} != {:?}", a, b);
+    }
+}
+
+#[track_caller]
+#[target_feature(enable = "avx")]
+unsafe fn assert_eq_m256d(a: __m256d, b: __m256d) {
+    let cmp = _mm256_cmp_pd::<_CMP_EQ_OQ>(a, b);
+    if _mm256_movemask_pd(cmp) != 0b1111 {
+        panic!("{:?} != {:?}", a, b);
+    }
+}
+
+#[track_caller]
+#[target_feature(enable = "avx")]
+unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) {
+    assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b))
+}
+
+/// Stores `T` in an unaligned address
+struct Unaligned<T: Copy> {
+    buf: Vec<u8>,
+    offset: bool,
+    _marker: std::marker::PhantomData<T>,
+}
+
+impl<T: Copy> Unaligned<T> {
+    fn new(value: T) -> Self {
+        // Allocate extra byte for unalignment headroom
+        let len = std::mem::size_of::<T>();
+        let mut buf = Vec::<u8>::with_capacity(len + 1);
+        // Force the address to be a non-multiple of 2, so it is as unaligned as it can get.
+        let offset = (buf.as_ptr() as usize % 2) == 0;
+        let value_ptr: *const T = &value;
+        unsafe {
+            buf.as_mut_ptr().add(offset.into()).copy_from_nonoverlapping(value_ptr.cast(), len);
+        }
+        Self { buf, offset, _marker: std::marker::PhantomData }
+    }
+
+    fn as_ptr(&self) -> *const T {
+        unsafe { self.buf.as_ptr().add(self.offset.into()).cast() }
+    }
+
+    fn as_mut_ptr(&mut self) -> *mut T {
+        unsafe { self.buf.as_mut_ptr().add(self.offset.into()).cast() }
+    }
+
+    fn read(&self) -> T {
+        unsafe { self.as_ptr().read_unaligned() }
+    }
 }

From 63240d758b0a6d48a8ea24799230ae2c47540fdd Mon Sep 17 00:00:00 2001
From: The Miri Conjob Bot <miri@cron.bot>
Date: Sat, 17 Feb 2024 05:10:27 +0000
Subject: [PATCH 29/30] Preparing for merge from rustc

---
 src/tools/miri/rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 2115e482c64a1..ab6f899cd3a5e 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-0f806a9812b62c36bdab08d33c14cf2d3ecf4355
+4316d0c6252cb1f833e582dfa68adb98efd5ddfb

From fd5a84b529059203665910a118289aade8101602 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Sat, 17 Feb 2024 13:02:04 +0100
Subject: [PATCH 30/30] update lockfile

---
 Cargo.lock | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f9ad78e37951d..1963f7c0d5662 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2524,14 +2524,12 @@ dependencies = [
  "aes",
  "colored",
  "ctrlc",
- "env_logger 0.10.2",
  "getrandom",
  "jemalloc-sys",
  "lazy_static",
  "libc",
  "libffi",
  "libloading",
- "log",
  "measureme",
  "rand",
  "regex",