From dc9228d62e231fc7dcad09c677379958c2421161 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 9 Aug 2023 06:03:41 -0700 Subject: [PATCH] Document movnt needs sfence For every intrinsic that may generate any of the MOVNT family of instructions, specify it must be followed by `_mm_sfence`. Also, ask people to not think too hard about what actually happens with write-combining memory buffers. They probably don't want to know, and in terms of the Rust abstract machine, we aren't actually entirely sure yet. --- crates/core_arch/src/x86/avx.rs | 33 +++++++++++++++++++++++++++++ crates/core_arch/src/x86/avx512f.rs | 33 +++++++++++++++++++++++++++++ crates/core_arch/src/x86/sse2.rs | 33 +++++++++++++++++++++++++++++ crates/core_arch/src/x86/sse4a.rs | 24 +++++++++++++++++++++ crates/core_arch/src/x86_64/sse2.rs | 10 +++++++++ 5 files changed, 133 insertions(+) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index c4f76cf092..bc647fff0b 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -1683,6 +1683,17 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i { /// aligned memory location. To minimize caching, the data is flagged as /// non-temporal (unlikely to be used again soon) /// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_si256) #[inline] #[target_feature(enable = "avx")] @@ -1696,6 +1707,17 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) { /// to a 32-byte aligned memory location. To minimize caching, the data is /// flagged as non-temporal (unlikely to be used again soon). /// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_pd) #[inline] #[target_feature(enable = "avx")] @@ -1711,6 +1733,17 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) { /// caching, the data is flagged as non-temporal (unlikely to be used again /// soon). /// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_ps) #[inline] #[target_feature(enable = "avx")] diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index a5c9d6f693..74c1351770 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -26144,6 +26144,17 @@ pub unsafe fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> /// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. /// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671) #[inline] #[target_feature(enable = "avx512f")] @@ -26155,6 +26166,17 @@ pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) { /// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. /// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667) #[inline] #[target_feature(enable = "avx512f")] @@ -26166,6 +26188,17 @@ pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) { /// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. /// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675) #[inline] #[target_feature(enable = "avx512f")] diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index f4fdb50469..b4e00d65b4 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -1277,6 +1277,17 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128) #[inline] #[target_feature(enable = "sse2")] @@ -1290,6 +1301,17 @@ pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32) #[inline] #[target_feature(enable = "sse2")] @@ -2469,6 +2491,17 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd) #[inline] #[target_feature(enable = "sse2")] diff --git a/crates/core_arch/src/x86/sse4a.rs b/crates/core_arch/src/x86/sse4a.rs index 976c907cb2..6df295d0a2 100644 --- a/crates/core_arch/src/x86/sse4a.rs +++ b/crates/core_arch/src/x86/sse4a.rs @@ -62,6 +62,18 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i { /// Non-temporal store of `a.0` into `p`. /// /// Writes 64-bit data to a memory location without polluting the caches. +/// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// #[inline] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(movntsd))] @@ -73,6 +85,18 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) { /// Non-temporal store of `a.0` into `p`. /// /// Writes 32-bit data to a memory location without polluting the caches. +/// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, but before the +/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline +/// stalls and yet-unspecified program behavior. +/// #[inline] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(movntss))] diff --git a/crates/core_arch/src/x86_64/sse2.rs b/crates/core_arch/src/x86_64/sse2.rs index bf2394ebab..e1534813ac 100644 --- a/crates/core_arch/src/x86_64/sse2.rs +++ b/crates/core_arch/src/x86_64/sse2.rs @@ -66,6 +66,16 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 { /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// +/// # Safety +/// +/// After using this intrinsic, but before any atomic operations occur, a call +/// to `_mm_sfence()` must be performed. A safe function that includes unsafe +/// usage of this intrinsic must always end in `_mm_sfence()`. +/// +/// Reading and writing to the memory stored-to by any other means, after any +/// nontemporal store has been used to write to that memory, is discouraged. +/// Doing so can lead to pipeline stalls and yet-unspecified program behavior. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64) #[inline] #[target_feature(enable = "sse2")]