diff --git a/Cargo.lock b/Cargo.lock index f9cd606dd1d52..c2932a0f0bbe1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1586,6 +1586,7 @@ version = "0.70.0" dependencies = [ "oxc_allocator", "oxc_ast", + "oxc_data_structures", "oxc_span", "oxc_syntax", ] @@ -1695,6 +1696,7 @@ name = "oxc_data_structures" version = "0.70.0" dependencies = [ "ropey", + "rustversion", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 1306a2851b596..558f72f5d2939 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -212,6 +212,7 @@ project-root = "0.2.2" rayon = "1.10.0" ropey = "1.6.1" rust-lapper = "1.1.0" +rustversion = "1.0.20" ryu-js = "1.0.2" saphyr = "0.0.4" schemars = "0.8.22" diff --git a/crates/oxc_ast_visit/Cargo.toml b/crates/oxc_ast_visit/Cargo.toml index 1b9e3731615e3..33cc2b31b573c 100644 --- a/crates/oxc_ast_visit/Cargo.toml +++ b/crates/oxc_ast_visit/Cargo.toml @@ -21,6 +21,7 @@ doctest = false [dependencies] oxc_allocator = { workspace = true } oxc_ast = { workspace = true } +oxc_data_structures = { workspace = true, features = ["pointer_ext"], optional = true } oxc_span = { workspace = true } oxc_syntax = { workspace = true } @@ -29,6 +30,7 @@ default = [] serialize = [ "oxc_allocator/serialize", "oxc_ast/serialize", + "oxc_data_structures", "oxc_span/serialize", "oxc_syntax/serialize", ] diff --git a/crates/oxc_ast_visit/src/utf8_to_utf16.rs b/crates/oxc_ast_visit/src/utf8_to_utf16.rs index 40f2ecd3f5f62..be054620a5fba 100644 --- a/crates/oxc_ast_visit/src/utf8_to_utf16.rs +++ b/crates/oxc_ast_visit/src/utf8_to_utf16.rs @@ -3,6 +3,7 @@ use std::{cmp::min, slice}; use oxc_ast::ast::{Comment, Program}; +use oxc_data_structures::pointer_ext::PointerExt; use oxc_span::Span; use oxc_syntax::module_record::{ModuleRecord, VisitMutModuleRecord}; @@ -551,7 +552,7 @@ fn build_translations(source_text: &str, translations: &mut Vec) { if chunk.contains_unicode() { // SAFETY: `ptr` is equal to or after `start_ptr`. Both are within bounds of `bytes`. // `ptr` is derived from `start_ptr`. - let offset = unsafe { offset_from(ptr, start_ptr) }; + let offset = unsafe { ptr.offset_from_usize(start_ptr) }; process_slice(chunk.as_slice(), offset); } @@ -572,29 +573,11 @@ fn build_translations(source_text: &str, translations: &mut Vec) { let last_chunk = unsafe { slice::from_raw_parts(ptr, remaining_len) }; // SAFETY: `ptr` is after `start_ptr`. Both are within bounds of `bytes`. // `ptr` is derived from `start_ptr`. - let offset = unsafe { offset_from(ptr, start_ptr) }; + let offset = unsafe { ptr.offset_from_usize(start_ptr) }; process_slice(last_chunk, offset); } } -/// Calculate distance in bytes from `from_ptr` to `to_ptr`. -/// -/// # SAFETY -/// * `from_ptr` must be before or equal to `to_ptr`. -/// * Both pointers must point to within the same object (or the end of the object). -/// * Both pointers must be derived from the same original pointer. -#[inline] -unsafe fn offset_from(to_ptr: *const u8, from_ptr: *const u8) -> usize { - debug_assert!(to_ptr as usize >= from_ptr as usize); - - // SAFETY: Caller `from_ptr` and `to_ptr` are both derived from same original pointer, - // and in bounds of same object. - // Both pointers are `*const u8`, so alignment and stride requirements are not relevant. - let offset = unsafe { to_ptr.offset_from(from_ptr) }; - // SAFETY: Caller guarantees `from_ptr` is before or equal to `to_ptr`, so `offset >= 0` - unsafe { usize::try_from(offset).unwrap_unchecked() } -} - #[cfg(test)] mod test { use oxc_allocator::Allocator; diff --git a/crates/oxc_codegen/Cargo.toml b/crates/oxc_codegen/Cargo.toml index c0244dec7343a..2da30676aea31 100644 --- a/crates/oxc_codegen/Cargo.toml +++ b/crates/oxc_codegen/Cargo.toml @@ -22,7 +22,7 @@ doctest = true [dependencies] oxc_allocator = { workspace = true } oxc_ast = { workspace = true } -oxc_data_structures = { workspace = true, features = ["code_buffer", "stack"] } +oxc_data_structures = { workspace = true, features = ["code_buffer", "pointer_ext", "stack"] } oxc_index = { workspace = true } oxc_semantic = { workspace = true } oxc_sourcemap = { workspace = true } diff --git a/crates/oxc_codegen/src/str.rs b/crates/oxc_codegen/src/str.rs index 04fb474a24585..76c32b0fffcf6 100644 --- a/crates/oxc_codegen/src/str.rs +++ b/crates/oxc_codegen/src/str.rs @@ -1,7 +1,7 @@ use std::slice; use oxc_ast::ast::StringLiteral; -use oxc_data_structures::assert_unchecked; +use oxc_data_structures::{assert_unchecked, pointer_ext::PointerExt}; use oxc_syntax::identifier::{LS, NBSP, PS}; use crate::Codegen; @@ -197,8 +197,7 @@ impl PrintStringState<'_> { // and the iterator only advances, so current position of `bytes` must be on or after `chunk_start` let len = unsafe { let bytes_ptr = self.bytes.as_slice().as_ptr(); - let offset = bytes_ptr.offset_from(self.chunk_start); - usize::try_from(offset).unwrap_unchecked() + bytes_ptr.offset_from_usize(self.chunk_start) }; // SAFETY: `chunk_start` is within bounds of original `&str`. diff --git a/crates/oxc_data_structures/Cargo.toml b/crates/oxc_data_structures/Cargo.toml index 65df36287056c..b57cd347196ef 100644 --- a/crates/oxc_data_structures/Cargo.toml +++ b/crates/oxc_data_structures/Cargo.toml @@ -23,12 +23,14 @@ doctest = true [dependencies] ropey = { workspace = true, optional = true } +rustversion = { workspace = true, optional = true } [features] default = [] -all = ["assert_unchecked", "code_buffer", "inline_string", "rope", "stack"] +all = ["assert_unchecked", "code_buffer", "inline_string", "pointer_ext", "rope", "stack"] assert_unchecked = [] code_buffer = ["assert_unchecked"] inline_string = ["assert_unchecked"] +pointer_ext = ["dep:rustversion"] rope = ["dep:ropey"] -stack = ["assert_unchecked"] +stack = ["pointer_ext"] diff --git a/crates/oxc_data_structures/src/lib.rs b/crates/oxc_data_structures/src/lib.rs index 76309e61864db..0fa297c0506c3 100644 --- a/crates/oxc_data_structures/src/lib.rs +++ b/crates/oxc_data_structures/src/lib.rs @@ -11,6 +11,9 @@ pub mod code_buffer; #[cfg(feature = "inline_string")] pub mod inline_string; +#[cfg(feature = "pointer_ext")] +pub mod pointer_ext; + #[cfg(feature = "rope")] pub mod rope; diff --git a/crates/oxc_data_structures/src/pointer_ext.rs b/crates/oxc_data_structures/src/pointer_ext.rs new file mode 100644 index 0000000000000..b3f398f3cb725 --- /dev/null +++ b/crates/oxc_data_structures/src/pointer_ext.rs @@ -0,0 +1,169 @@ +//! Extension trait for pointers. See [`PointerExt`]. + +// TODO: Once our MSRV reaches v1.87.0, remove this trait and just use `offset_from_unsigned` directly. +// `#[expect(clippy::incompatible_msrv)]` below will trigger a warning when MSRV is bumped to 1.87.0. + +#![expect(clippy::inline_always)] + +use std::ptr::NonNull; + +/// Extension trait for pointers. +/// +/// Rust v1.87.0 introduced `offset_from_unsigned` and `byte_offset_from_unsigned` methods for pointers. +/// +/// +/// These are implemented as intrinsics, and potentially gives the compiler more information +/// with which to make optimizations, compared to either: +/// +/// * `end.offset_from(start) as usize` +/// * `usize::try_from(end.offset_from(start)).unwrap_unchecked()` +/// +/// We want to use these methods, but they're not available on our current MSRV. +/// +/// This trait provides alternatives `offset_from_usize` and `byte_offset_from_usize`. +/// +/// * On Rust v1.87.0+, they use Rust's native methods. +/// * On earlier versions of Rust, they use a fallback. +#[expect(private_bounds)] +pub trait PointerExt: PointerExtImpl { + /// Calculates the distance between two pointers within the same allocation, + /// *where it's known that `self` is equal to or greater than `origin`*. + /// The returned value is in units of `T`: the distance in bytes is divided by `size_of::()`. + /// + /// # SAFETY + /// + /// * The distance between the pointers must be non-negative (`self >= origin`). + /// + /// * *All* the safety conditions of `offset_from` apply to this method as well; + /// see it for the full details. + /// + /// See + /// for full details. + #[inline(always)] + unsafe fn offset_from_usize(self, origin: Self) -> usize { + // SAFETY: Same constraints as this method + unsafe { self.offset_from_usize_impl(origin) } + } + + /// Calculates the distance between two pointers within the same allocation, + /// *where it's known that `self` is equal to or greater than `origin`*. + /// The returned value is in units of **bytes**. + /// + /// # SAFETY + /// + /// * The distance between the pointers must be non-negative (`self >= origin`). + /// + /// * *All* the safety conditions of `offset_from` apply to this method as well; + /// see it for the full details. + /// + /// See + /// for full details. + unsafe fn byte_offset_from_usize(self, origin: Self) -> usize { + // SAFETY: Same constraints as this method + unsafe { self.byte_offset_from_usize_impl(origin) } + } +} + +impl PointerExt for *const T {} + +impl PointerExt for *mut T {} + +impl PointerExt for NonNull {} + +/// Trait that does the actual work. +/// +/// This trait is not `pub`, to prevent [`PointerExt`] being implemented on other types +/// outside this module. +/// +/// The other purpose of this trait is to avoid repeating the docs for the methods 12 times. +trait PointerExtImpl: Sized { + unsafe fn offset_from_usize_impl(self, origin: Self) -> usize; + unsafe fn byte_offset_from_usize_impl(self, origin: Self) -> usize; +} + +/// Native version - just delegates to Rust's methods. +#[rustversion::since(1.87.0)] +#[expect(clippy::incompatible_msrv, clippy::undocumented_unsafe_blocks)] +const _: () = { + impl PointerExtImpl for *const T { + #[inline(always)] + unsafe fn offset_from_usize_impl(self, origin: Self) -> usize { + unsafe { self.offset_from_unsigned(origin) } + } + + #[inline(always)] + unsafe fn byte_offset_from_usize_impl(self, origin: Self) -> usize { + unsafe { self.byte_offset_from_unsigned(origin) } + } + } + + impl PointerExtImpl for *mut T { + #[inline(always)] + unsafe fn offset_from_usize_impl(self, origin: Self) -> usize { + unsafe { self.offset_from_unsigned(origin) } + } + + #[inline(always)] + unsafe fn byte_offset_from_usize_impl(self, origin: Self) -> usize { + unsafe { self.byte_offset_from_unsigned(origin) } + } + } + + impl PointerExtImpl for NonNull { + #[inline(always)] + unsafe fn offset_from_usize_impl(self, origin: Self) -> usize { + unsafe { self.offset_from_unsigned(origin) } + } + + #[inline(always)] + unsafe fn byte_offset_from_usize_impl(self, origin: Self) -> usize { + unsafe { self.byte_offset_from_unsigned(origin) } + } + } +}; + +/// Fallback version. This is the best we can do prior to Rust v1.87.0. +#[rustversion::before(1.87.0)] +const _: () = { + impl PointerExtImpl for *const T { + #[inline(always)] + unsafe fn offset_from_usize_impl(self, origin: Self) -> usize { + // SAFETY: Has same safety requirements as native `offset_from_unsigned` method + unsafe { usize::try_from(self.offset_from(origin)).unwrap_unchecked() } + } + + #[inline(always)] + unsafe fn byte_offset_from_usize_impl(self, origin: Self) -> usize { + // SAFETY: Has same safety requirements as native `byte_offset_from_unsigned` method + unsafe { usize::try_from(self.byte_offset_from(origin)).unwrap_unchecked() } + } + } + + impl PointerExtImpl for *mut T { + #[inline(always)] + unsafe fn offset_from_usize_impl(self, origin: Self) -> usize { + // SAFETY: Has same safety requirements as native `offset_from_unsigned` method + unsafe { usize::try_from(self.offset_from(origin)).unwrap_unchecked() } + } + + #[inline(always)] + unsafe fn byte_offset_from_usize_impl(self, origin: Self) -> usize { + // SAFETY: Has same safety requirements as native `byte_offset_from_unsigned` method + unsafe { usize::try_from(self.byte_offset_from(origin)).unwrap_unchecked() } + } + } + + impl PointerExtImpl for NonNull { + #[inline(always)] + unsafe fn offset_from_usize_impl(self, origin: Self) -> usize { + // SAFETY: Has same safety requirements as native `offset_from_unsigned` method + unsafe { usize::try_from(self.offset_from(origin)).unwrap_unchecked() } + } + + #[inline(always)] + unsafe fn byte_offset_from_usize_impl(self, origin: Self) -> usize { + // SAFETY: Has same safety requirements as native `byte_offset_from_unsigned` method + unsafe { usize::try_from(self.byte_offset_from(origin)).unwrap_unchecked() } + } + } +}; diff --git a/crates/oxc_data_structures/src/stack/common.rs b/crates/oxc_data_structures/src/stack/common.rs index 005403a6f42eb..85ed2650f71d6 100644 --- a/crates/oxc_data_structures/src/stack/common.rs +++ b/crates/oxc_data_structures/src/stack/common.rs @@ -5,7 +5,7 @@ use std::{ slice, }; -use crate::assert_unchecked; +use crate::pointer_ext::PointerExt; use super::StackCapacity; @@ -146,18 +146,12 @@ pub trait StackCommon: StackCapacity { /// * `self.cursor()` must be `>= self.start()`. /// * Byte distance between `self.cursor()` and `self.start()` must be a multiple of `size_of::()`. unsafe fn cursor_offset(&self) -> usize { - // `offset_from` returns offset in units of `T`. + // `offset_from_usize` returns offset in units of `T`. // SAFETY: Caller guarantees `cursor` and `start` are derived from same pointer. // This implies that both pointers are always within bounds of a single allocation. // Caller guarantees `cursor >= start`. // Caller guarantees distance between pointers is a multiple of `size_of::()`. - // `assert_unchecked!` is to help compiler to optimize. - // See: https://doc.rust-lang.org/std/primitive.pointer.html#method.sub_ptr - #[expect(clippy::cast_sign_loss)] - unsafe { - assert_unchecked!(self.cursor() >= self.start()); - self.cursor().offset_from(self.start()) as usize - } + unsafe { self.cursor().offset_from_usize(self.start()) } } /// Get capacity. @@ -168,13 +162,7 @@ pub trait StackCommon: StackCapacity { // * `start` and `end` are both within bounds of a single allocation. // * `end` is always >= `start`. // * Distance between `start` and `end` is always a multiple of `size_of::()`. - // `assert_unchecked!` is to help compiler to optimize. - // See: https://doc.rust-lang.org/std/primitive.pointer.html#method.sub_ptr - #[expect(clippy::cast_sign_loss)] - unsafe { - assert_unchecked!(self.end() >= self.start()); - self.end().offset_from(self.start()) as usize - } + unsafe { self.end().offset_from_usize(self.start()) } } /// Get capacity in bytes. @@ -185,13 +173,7 @@ pub trait StackCommon: StackCapacity { // * `start` and `end` are both within bounds of a single allocation. // * `end` is always >= `start`. // * Distance between `start` and `end` is always a multiple of `size_of::()`. - // `assert_unchecked!` is to help compiler to optimize. - // See: https://doc.rust-lang.org/std/primitive.pointer.html#method.sub_ptr - #[expect(clippy::cast_sign_loss)] - unsafe { - assert_unchecked!(self.end() >= self.start()); - self.end().byte_offset_from(self.start()) as usize - } + unsafe { self.end().byte_offset_from_usize(self.start()) } } /// Get contents of stack as a slice `&[T]`. diff --git a/crates/oxc_estree/Cargo.toml b/crates/oxc_estree/Cargo.toml index 06c127fb24ec1..20667b81562fb 100644 --- a/crates/oxc_estree/Cargo.toml +++ b/crates/oxc_estree/Cargo.toml @@ -19,7 +19,7 @@ workspace = true doctest = false [dependencies] -oxc_data_structures = { workspace = true, features = ["code_buffer", "stack"], optional = true } +oxc_data_structures = { workspace = true, features = ["code_buffer", "pointer_ext", "stack"], optional = true } itoa = { workspace = true, optional = true } ryu-js = { workspace = true, optional = true } diff --git a/crates/oxc_estree/src/serialize/strings.rs b/crates/oxc_estree/src/serialize/strings.rs index 8c11df54c0fe9..2e3150282e2f4 100644 --- a/crates/oxc_estree/src/serialize/strings.rs +++ b/crates/oxc_estree/src/serialize/strings.rs @@ -1,6 +1,6 @@ use std::slice; -use oxc_data_structures::code_buffer::CodeBuffer; +use oxc_data_structures::{code_buffer::CodeBuffer, pointer_ext::PointerExt}; use super::{ESTree, Serializer}; @@ -119,7 +119,6 @@ const fn create_table(lo: Escape) -> [Escape; 256] { // `#[inline(always)]` because this is a hot path, and to make compiler remove the code // for handling lone surrogates when outputting a normal string (the common case). #[inline(always)] -#[expect(clippy::cast_sign_loss)] fn write_str(s: &str, table: &[Escape; 256], buffer: &mut CodeBuffer) { buffer.print_ascii_byte(b'"'); @@ -194,7 +193,7 @@ fn write_str(s: &str, table: &[Escape; 256], buffer: &mut CodeBuffer) { // `chunk_start_ptr` is after a previous byte so must be `<= current_ptr`. unsafe { let current_ptr = iter.as_slice().as_ptr(); - let len = current_ptr.offset_from(chunk_start_ptr) as usize; + let len = current_ptr.offset_from_usize(chunk_start_ptr); let chunk = slice::from_raw_parts(chunk_start_ptr, len); buffer.print_bytes_unchecked(chunk); } @@ -252,7 +251,7 @@ fn write_str(s: &str, table: &[Escape; 256], buffer: &mut CodeBuffer) { // an ASCII character, so must also be on a UTF-8 character boundary, and in bounds. // `chunk_start_ptr` is after a previous byte so must be `<= current_ptr`. unsafe { - let len = current_ptr.offset_from(chunk_start_ptr) as usize; + let len = current_ptr.offset_from_usize(chunk_start_ptr); let chunk = slice::from_raw_parts(chunk_start_ptr, len); buffer.print_bytes_unchecked(chunk); } @@ -275,7 +274,7 @@ fn write_str(s: &str, table: &[Escape; 256], buffer: &mut CodeBuffer) { // an ASCII character, so must be on a UTF-8 character boundary, and in bounds. // `chunk_start_ptr` is after a previous byte so must be `<= end_ptr`. unsafe { - let len = end_ptr.offset_from(chunk_start_ptr) as usize; + let len = end_ptr.offset_from_usize(chunk_start_ptr); let chunk = slice::from_raw_parts(chunk_start_ptr, len); buffer.print_bytes_unchecked(chunk); }