Skip to content
6 changes: 2 additions & 4 deletions compiler/rustc_data_structures/src/fx.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use std::hash::BuildHasherDefault;

pub use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet, FxHasher};

pub type StdEntry<'a, K, V> = std::collections::hash_map::Entry<'a, K, V>;

pub type FxIndexMap<K, V> = indexmap::IndexMap<K, V, BuildHasherDefault<FxHasher>>;
pub type FxIndexSet<V> = indexmap::IndexSet<V, BuildHasherDefault<FxHasher>>;
pub type FxIndexMap<K, V> = indexmap::IndexMap<K, V, FxBuildHasher>;
pub type FxIndexSet<V> = indexmap::IndexSet<V, FxBuildHasher>;
pub type IndexEntry<'a, K, V> = indexmap::map::Entry<'a, K, V>;
pub type IndexOccupiedEntry<'a, K, V> = indexmap::map::OccupiedEntry<'a, K, V>;

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_data_structures/src/unord.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ use std::hash::Hash;
use std::iter::{Product, Sum};
use std::ops::Index;

use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet};
use rustc_macros::{Decodable_NoContext, Encodable_NoContext};

use crate::fingerprint::Fingerprint;
use crate::fx::{FxBuildHasher, FxHashMap, FxHashSet};
use crate::stable_hasher::{HashStable, StableCompare, StableHasher, ToStableHashKey};

/// `UnordItems` is the order-less version of `Iterator`. It only contains methods
Expand Down
8 changes: 3 additions & 5 deletions compiler/rustc_type_ir/src/data_structures/mod.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use std::hash::BuildHasherDefault;

pub use ena::unify::{NoError, UnifyKey, UnifyValue};
use rustc_hash::FxHasher;
use rustc_hash::FxBuildHasher;
pub use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};

pub type IndexMap<K, V> = indexmap::IndexMap<K, V, BuildHasherDefault<FxHasher>>;
pub type IndexSet<V> = indexmap::IndexSet<V, BuildHasherDefault<FxHasher>>;
pub type IndexMap<K, V> = indexmap::IndexMap<K, V, FxBuildHasher>;
pub type IndexSet<V> = indexmap::IndexSet<V, FxBuildHasher>;

mod delayed_map;

Expand Down
6 changes: 2 additions & 4 deletions library/core/src/pin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -831,15 +831,13 @@
//! <code>fn get_pin_mut(self: [Pin]<[`&mut Self`]>) -> [Pin]<[`&mut T`]></code>.
//! Then we could do the following:
//! ```compile_fail
//! # use std::cell::RefCell;
//! # use std::pin::Pin;
//! fn exploit_ref_cell<T>(rc: Pin<&mut RefCell<T>>) {
//! fn exploit_ref_cell<T>(mut rc: Pin<&mut RefCell<T>>) {
//! // Here we get pinned access to the `T`.
//! let _: Pin<&mut T> = rc.as_mut().get_pin_mut();
//!
//! // And here we have `&mut T` to the same data.
//! let shared: &RefCell<T> = rc.into_ref().get_ref();
//! let borrow = shared.borrow_mut();
//! let mut borrow = shared.borrow_mut();
//! let content = &mut *borrow;
//! }
//! ```
Expand Down
62 changes: 22 additions & 40 deletions library/core/src/slice/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,56 +460,38 @@ const fn is_ascii(s: &[u8]) -> bool {
)
}

/// Chunk size for vectorized ASCII checking (two 16-byte SSE registers).
/// Chunk size for SSE2 vectorized ASCII checking (4x 16-byte loads).
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
const CHUNK_SIZE: usize = 32;
const SSE2_CHUNK_SIZE: usize = 64;

/// SSE2 implementation using `_mm_movemask_epi8` (compiles to `pmovmskb`) to
/// avoid LLVM's broken AVX-512 auto-vectorization of counting loops.
///
/// FIXME(llvm#176906): Remove this workaround once LLVM generates efficient code.
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
#[inline]
fn is_ascii_sse2(bytes: &[u8]) -> bool {
use crate::arch::x86_64::{__m128i, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128};

let mut i = 0;

while i + CHUNK_SIZE <= bytes.len() {
// SAFETY: We have verified that `i + CHUNK_SIZE <= bytes.len()`.
let ptr = unsafe { bytes.as_ptr().add(i) };

// Load two 16-byte chunks and combine them.
// SAFETY: We verified `i + 32 <= len`, so ptr is valid for 32 bytes.
// `_mm_loadu_si128` allows unaligned loads.
let chunk1 = unsafe { _mm_loadu_si128(ptr as *const __m128i) };
// SAFETY: Same as above - ptr.add(16) is within the valid 32-byte range.
let chunk2 = unsafe { _mm_loadu_si128(ptr.add(16) as *const __m128i) };

// OR them together - if any byte has the high bit set, the result will too.
// SAFETY: SSE2 is guaranteed by the cfg predicate.
let combined = unsafe { _mm_or_si128(chunk1, chunk2) };

// Create a mask from the MSBs of each byte.
// If any byte is >= 128, its MSB is 1, so the mask will be non-zero.
// SAFETY: SSE2 is guaranteed by the cfg predicate.
let mask = unsafe { _mm_movemask_epi8(combined) };

let (chunks, rest) = bytes.as_chunks::<SSE2_CHUNK_SIZE>();

for chunk in chunks {
let ptr = chunk.as_ptr();
// SAFETY: chunk is 64 bytes. SSE2 is baseline on x86_64.
let mask = unsafe {
let a1 = _mm_loadu_si128(ptr as *const __m128i);
let a2 = _mm_loadu_si128(ptr.add(16) as *const __m128i);
let b1 = _mm_loadu_si128(ptr.add(32) as *const __m128i);
let b2 = _mm_loadu_si128(ptr.add(48) as *const __m128i);
// OR all chunks - if any byte has high bit set, combined will too.
let combined = _mm_or_si128(_mm_or_si128(a1, a2), _mm_or_si128(b1, b2));
// Create a mask from the MSBs of each byte.
// If any byte is >= 128, its MSB is 1, so the mask will be non-zero.
_mm_movemask_epi8(combined)
};
if mask != 0 {
return false;
}

i += CHUNK_SIZE;
}

// Handle remaining bytes with simple loop
while i < bytes.len() {
if !bytes[i].is_ascii() {
return false;
}
i += 1;
}

true
// Handle remaining bytes
rest.iter().all(|b| b.is_ascii())
}

/// ASCII test optimized to use the `pmovmskb` instruction on `x86-64`.
Expand All @@ -529,7 +511,7 @@ const fn is_ascii(bytes: &[u8]) -> bool {
is_ascii_simple(bytes)
} else {
// For small inputs, use usize-at-a-time processing to avoid SSE2 call overhead.
if bytes.len() < CHUNK_SIZE {
if bytes.len() < SSE2_CHUNK_SIZE {
let chunks = bytes.chunks_exact(USIZE_SIZE);
let remainder = chunks.remainder();
for chunk in chunks {
Expand Down
2 changes: 1 addition & 1 deletion src/doc/rustc/src/platform-support/wasm32-wasip1.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ focused on the Component Model-based definition of WASI. At this point the
`wasm32-wasip1` Rust target is intended for historical compatibility with
[WASIp1] set of syscalls.

[WASIp1]: https://github.com/WebAssembly/WASI/tree/main/legacy/preview1
[WASIp1]: https://github.com/WebAssembly/WASI/tree/wasi-0.1/preview1
[Component Model]: https://github.com/webassembly/component-model

Today the `wasm32-wasip1` target will generate core WebAssembly modules
Expand Down
6 changes: 3 additions & 3 deletions tests/assembly-llvm/slice-is-ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
/// Verify `is_ascii` generates efficient code on different architectures:
///
/// - x86_64: Must NOT use `kshiftrd`/`kshiftrq` (broken AVX-512 auto-vectorization).
/// The fix uses explicit SSE2 intrinsics (`pmovmskb`/`vpmovmskb`).
/// See: https://github.com/llvm/llvm-project/issues/176906
/// Good version uses explicit SSE2 intrinsics (`pmovmskb`/`vpmovmskb`).
///
/// - loongarch64: Should use `vmskltz.b` instruction for the fast-path.
/// This architecture still relies on LLVM auto-vectorization.

// X86_64-LABEL: test_is_ascii
// X86_64-NOT: kshiftrd
// X86_64-NOT: kshiftrq
// X86_64: {{vpor|por}}
// X86_64: {{vpmovmskb|pmovmskb}}

// LA64-LABEL: test_is_ascii
// LA64: vmskltz.b
Expand Down
Loading