diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs index b21c559e6686c..6ad3eb92e88b5 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs @@ -725,7 +725,8 @@ fn codegen_regular_intrinsic_call<'tcx>( // Cranelift treats stores as volatile by default // FIXME correctly handle unaligned_volatile_store - // FIXME actually do nontemporal stores if requested + // FIXME actually do nontemporal stores if requested (but see the LLVM backend for which + // architectures even allow this in a sane way) let dest = CPlace::for_ptr(Pointer::new(ptr), val.layout()); dest.write_cvalue(fx, val); } diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs index b9e4bd79fe1e6..e9e777e72f629 100644 --- a/compiler/rustc_codegen_gcc/src/builder.rs +++ b/compiler/rustc_codegen_gcc/src/builder.rs @@ -1128,6 +1128,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { self.llbb().add_assignment(self.location, aligned_destination, val); // TODO(antoyo): handle align and flags. // NOTE: dummy value here since it's never used. FIXME(antoyo): API should not return a value here? + // When adding support for NONTEMPORAL, see see the LLVM backend for which + // architectures even allow this in a sane way. self.cx.context.new_rvalue_zero(self.type_i32()) } diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 72ff9ea118e28..4e41e73407af9 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -725,13 +725,27 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { llvm::LLVMSetVolatile(store, llvm::True); } if flags.contains(MemFlags::NONTEMPORAL) { - // According to LLVM [1] building a nontemporal store must - // *always* point to a metadata value of the integer 1. - // - // [1]: https://llvm.org/docs/LangRef.html#store-instruction - let one = self.cx.const_i32(1); - let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1); - llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node); + // Make sure that the current target architectures supports "sane" non-temporal + // stores, i.e., non-temporal stores that are equivalent to regular stores except + // for performance. LLVM doesn't seem to care about this, and will happily treat + // `!nontemporal` stores as-if they were normal stores (for reordering optimizations + // etc) even on x86, despite later lowering them to MOVNT which do *not* behave like + // regular stores but require special fences. + // So we keep a list of architectures where `!nontemporal` is known to be truly just + // a hint, and use regular stores everywhere else. + const WELL_BEHAVED_NONTEMPORAL_ARCHS: &[&str] = &["aarch64", "arm"]; + + let use_nontemporal = + WELL_BEHAVED_NONTEMPORAL_ARCHS.contains(&&*self.cx.tcx.sess.target.arch); + if use_nontemporal { + // According to LLVM [1] building a nontemporal store must + // *always* point to a metadata value of the integer 1. + // + // [1]: https://llvm.org/docs/LangRef.html#store-instruction + let one = self.cx.const_i32(1); + let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1); + llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node); + } } store } diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs index c4c6388338949..7bf6b030564a9 100644 --- a/library/core/src/intrinsics.rs +++ b/library/core/src/intrinsics.rs @@ -2405,12 +2405,12 @@ extern "rust-intrinsic" { #[rustc_nounwind] pub fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32; - /// Emits a `!nontemporal` store according to LLVM (see their docs). - /// Probably will never become stable. + /// Emits a `nontemporal` store, which gives a hint to the CPU that the data should not be held + /// in cache. Except for performance, this is fully equivalent to `ptr.write(val)`. /// - /// Do NOT use this intrinsic; "nontemporal" operations do not exist in our memory model! - /// It exists to support current stdarch, but the plan is to change stdarch and remove this intrinsic. - /// See for some more discussion. + /// Not all architectures provide such an operation. For instance, x86 does not: while `MOVNT` + /// exists, that operation is *not* equivalent to `ptr.write(val)` (`MOVNT` writes can be reordered + /// in ways that are not allowed for regular writes). #[rustc_nounwind] pub fn nontemporal_store(ptr: *mut T, val: T);