@@ -725,13 +725,29 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
725
725
llvm:: LLVMSetVolatile ( store, llvm:: True ) ;
726
726
}
727
727
if flags. contains ( MemFlags :: NONTEMPORAL ) {
728
- // According to LLVM [1] building a nontemporal store must
729
- // *always* point to a metadata value of the integer 1.
730
- //
731
- // [1]: https://llvm.org/docs/LangRef.html#store-instruction
732
- let one = self . cx . const_i32 ( 1 ) ;
733
- let node = llvm:: LLVMMDNodeInContext ( self . cx . llcx , & one, 1 ) ;
734
- llvm:: LLVMSetMetadata ( store, llvm:: MD_nontemporal as c_uint , node) ;
728
+ // Make sure that the current target architectures supports "sane" non-temporal
729
+ // stores, i.e., non-temporal stores that are equivalent to regular stores except
730
+ // for performance. LLVM doesn't seem to care about this, and will happily treat
731
+ // `!nontemporal` stores as-if they were normal stores (for reordering optimizations
732
+ // etc) even on x86, despite later lowering them to MOVNT which do *not* behave like
733
+ // regular stores but require special fences.
734
+ // So we keep a list of architectures where `!nontemporal` is known to be truly just
735
+ // a hint, and use regular stores everywhere else.
736
+ // (In the future, we could alternatively ensure that an sfence gets emitted after a sequence of movnt
737
+ // before any kind of synchronizing operation. But it's not clear how to do that with LLVM.)
738
+ const WELL_BEHAVED_NONTEMPORAL_ARCHS : & [ & str ] = & [ "aarch64" , "arm" ] ;
739
+
740
+ let use_nontemporal =
741
+ WELL_BEHAVED_NONTEMPORAL_ARCHS . contains ( & & * self . cx . tcx . sess . target . arch ) ;
742
+ if use_nontemporal {
743
+ // According to LLVM [1] building a nontemporal store must
744
+ // *always* point to a metadata value of the integer 1.
745
+ //
746
+ // [1]: https://llvm.org/docs/LangRef.html#store-instruction
747
+ let one = self . cx . const_i32 ( 1 ) ;
748
+ let node = llvm:: LLVMMDNodeInContext ( self . cx . llcx , & one, 1 ) ;
749
+ llvm:: LLVMSetMetadata ( store, llvm:: MD_nontemporal as c_uint , node) ;
750
+ }
735
751
}
736
752
store
737
753
}
0 commit comments