diff --git a/crates/oxc_allocator/src/allocator.rs b/crates/oxc_allocator/src/allocator.rs index 5b4361306cfe8..e75a14b65a60d 100644 --- a/crates/oxc_allocator/src/allocator.rs +++ b/crates/oxc_allocator/src/allocator.rs @@ -1,5 +1,9 @@ +use std::{alloc::Layout, ptr, slice, str}; + use bumpalo::Bump; +use oxc_data_structures::assert_unchecked; + /// A bump-allocated memory arena. /// /// # Anatomy of an Allocator @@ -288,6 +292,109 @@ impl Allocator { self.bump.alloc_str(src) } + /// Create new `&str` from a fixed-size array of `&str`s concatenated together, + /// allocated in the given `allocator`. + /// + /// # Panics + /// + /// Panics if the sum of length of all strings exceeds `isize::MAX`. + /// + /// # Example + /// ``` + /// use oxc_allocator::Allocator; + /// + /// let allocator = Allocator::new(); + /// let s = allocator.alloc_concat_strs_array(["hello", " ", "world", "!"]); + /// assert_eq!(s, "hello world!"); + /// ``` + // `#[inline(always)]` because want compiler to be able to remove checked addition where some of + // `strings` are statically known. + #[expect(clippy::inline_always)] + #[inline(always)] + pub fn alloc_concat_strs_array<'a, const N: usize>(&'a self, strings: [&str; N]) -> &'a str { + // Calculate total length of all the strings concatenated. + // + // We have to use `checked_add` here to guard against additions wrapping around + // if some of the input `&str`s are very long, or there's many of them. + // + // However, `&str`s have max length of `isize::MAX`. + // https://users.rust-lang.org/t/does-str-reliably-have-length-isize-max/126777 + // Use `assert_unchecked!` to communicate this invariant to compiler, which allows it to + // optimize out the overflow checks where some of `strings` are static, so their size is known. + // + // e.g. `allocator.from_strs_array_in(["__vite_ssr_import_", str, "__"])`, for example, + // requires no checks at all, because the static parts have total length of 20 bytes, + // and `str` has max length of `isize::MAX`. `isize::MAX as usize + 20` cannot overflow `usize`. + // Compiler can see that, and removes the overflow check. + // https://godbolt.org/z/MGh44Yz5d + #[expect(clippy::checked_conversions)] + let total_len = strings.iter().fold(0usize, |total_len, s| { + let len = s.len(); + // SAFETY: `&str`s have maximum length of `isize::MAX` + unsafe { assert_unchecked!(len <= (isize::MAX as usize)) }; + total_len.checked_add(len).unwrap() + }); + assert!( + isize::try_from(total_len).is_ok(), + "attempted to create a string longer than `isize::MAX` bytes" + ); + + // Create actual `&str` in a separate function, to ensure that `alloc_concat_strs_array` + // is inlined, so that compiler has knowledge to remove the overflow checks above. + // When some of `strings` are static, this function is usually only a few instructions. + // Compiler can choose whether or not to inline `alloc_concat_strs_array_with_total_len_in`. + // SAFETY: `total_len` has been calculated correctly above. + // `total_len` is `<= isize::MAX`. + unsafe { self.alloc_concat_strs_array_with_total_len_in(strings, total_len) } + } + + /// Create a new `&str` from a fixed-size array of `&str`s concatenated together, + /// allocated in the given `allocator`, with provided `total_len`. + /// + /// # SAFETY + /// * `total_len` must be the total length of all `strings` concatenated. + /// * `total_len` must be `<= isize::MAX`. + unsafe fn alloc_concat_strs_array_with_total_len_in<'a, const N: usize>( + &'a self, + strings: [&str; N], + total_len: usize, + ) -> &'a str { + if total_len == 0 { + return ""; + } + + // Allocate `total_len` bytes. + // SAFETY: Caller guarantees `total_len <= isize::MAX`. + let layout = unsafe { Layout::from_size_align_unchecked(total_len, 1) }; + let start_ptr = self.bump().alloc_layout(layout); + + let mut end_ptr = start_ptr; + for str in strings { + let src_ptr = str.as_ptr(); + let len = str.len(); + + // SAFETY: + // `src` is obtained from a `&str` with length `len`, so is valid for reading `len` bytes. + // `end_ptr` is within bounds of the allocation. So is `end_ptr + len`. + // `u8` has no alignment requirements, so `src_ptr` and `end_ptr` are sufficiently aligned. + // No overlapping, because we're copying from an existing `&str` to a newly allocated buffer. + unsafe { ptr::copy_nonoverlapping(src_ptr, end_ptr.as_ptr(), len) }; + + // SAFETY: We allocated sufficient capacity for all the strings concatenated. + // So `end_ptr.add(len)` cannot go out of bounds. + end_ptr = unsafe { end_ptr.add(len) }; + } + + debug_assert_eq!(end_ptr.as_ptr() as usize - start_ptr.as_ptr() as usize, total_len); + + // SAFETY: We have allocated and filled `total_len` bytes starting at `start_ptr`. + // Concatenating multiple `&str`s results in a valid UTF-8 string. + unsafe { + let slice = slice::from_raw_parts(start_ptr.as_ptr(), total_len); + str::from_utf8_unchecked(slice) + } + } + /// Reset this allocator. /// /// Performs mass deallocation on everything allocated in this arena by resetting the pointer @@ -469,4 +576,48 @@ mod test { } allocator.reset(); } + + #[test] + fn string_from_array_len_1() { + let allocator = Allocator::default(); + let s = allocator.alloc_concat_strs_array(["hello"]); + assert_eq!(s, "hello"); + } + + #[test] + fn string_from_array_len_2() { + let allocator = Allocator::default(); + let s = allocator.alloc_concat_strs_array(["hello", "world!"]); + assert_eq!(s, "helloworld!"); + } + + #[test] + fn string_from_array_len_3() { + let hello = "hello"; + let world = std::string::String::from("world"); + let allocator = Allocator::default(); + let s = allocator.alloc_concat_strs_array([hello, &world, "!"]); + assert_eq!(s, "helloworld!"); + } + + #[test] + fn string_from_empty_array() { + let allocator = Allocator::default(); + let s = allocator.alloc_concat_strs_array([]); + assert_eq!(s, ""); + } + + #[test] + fn string_from_array_of_empty_strs() { + let allocator = Allocator::default(); + let s = allocator.alloc_concat_strs_array(["", "", ""]); + assert_eq!(s, ""); + } + + #[test] + fn string_from_array_containing_some_empty_strs() { + let allocator = Allocator::default(); + let s = allocator.alloc_concat_strs_array(["", "hello", ""]); + assert_eq!(s, "hello"); + } } diff --git a/crates/oxc_allocator/src/string.rs b/crates/oxc_allocator/src/string.rs index e3c728cba8ebf..660cefa6f55fa 100644 --- a/crates/oxc_allocator/src/string.rs +++ b/crates/oxc_allocator/src/string.rs @@ -130,6 +130,9 @@ impl<'alloc> String<'alloc> { /// Create a new [`String`] from a fixed-size array of `&str`s concatenated together, /// allocated in the given `allocator`. /// + /// If you're not altering the `String` after this call, and just converting it to an `Atom`, + /// `Atom::from_strs_array_in` may be slightly more efficient. + /// /// # Examples /// ``` /// use oxc_allocator::{Allocator, String}; diff --git a/crates/oxc_ast/src/ast_builder_impl.rs b/crates/oxc_ast/src/ast_builder_impl.rs index b10dba4692dc7..b032dc165bbd0 100644 --- a/crates/oxc_ast/src/ast_builder_impl.rs +++ b/crates/oxc_ast/src/ast_builder_impl.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; -use oxc_allocator::{Allocator, AllocatorAccessor, Box, FromIn, IntoIn, String, Vec}; +use oxc_allocator::{Allocator, AllocatorAccessor, Box, FromIn, IntoIn, Vec}; use oxc_span::{Atom, SPAN, Span}; use oxc_syntax::{number::NumberBase, operator::UnaryOperator, scope::ScopeId}; @@ -90,9 +90,8 @@ impl<'a> AstBuilder<'a> { /// Allocate an [`Atom`] from an array of string slices. #[inline] - pub fn atom_from_strs_array(self, array: [&str; N]) -> Atom<'a> { - let string = String::from_strs_array_in(array, self.allocator); - Atom::from(string) + pub fn atom_from_strs_array(self, strings: [&str; N]) -> Atom<'a> { + Atom::from_strs_array_in(strings, self.allocator) } /// Convert a [`Cow<'a, str>`] to an [`Atom<'a>`]. diff --git a/crates/oxc_span/src/atom.rs b/crates/oxc_span/src/atom.rs index b7094b81d9e7c..2903bfa7456a6 100644 --- a/crates/oxc_span/src/atom.rs +++ b/crates/oxc_span/src/atom.rs @@ -61,6 +61,33 @@ impl<'a> Atom<'a> { pub fn to_compact_str(self) -> CompactStr { CompactStr::new(self.as_str()) } + + /// Create new [`Atom`] from a fixed-size array of `&str`s concatenated together, + /// allocated in the given `allocator`. + /// + /// # Panics + /// + /// Panics if the sum of length of all strings exceeds `isize::MAX`. + /// + /// # Example + /// ``` + /// use oxc_allocator::Allocator; + /// use oxc_span::Atom; + /// + /// let allocator = Allocator::new(); + /// let s = Atom::from_strs_array_in(["hello", " ", "world", "!"], &allocator); + /// assert_eq!(s.as_str(), "hello world!"); + /// ``` + // `#[inline(always)]` because want compiler to be able to optimize where some of `strings` + // are statically known. See `Allocator::alloc_concat_strs_array`. + #[expect(clippy::inline_always)] + #[inline(always)] + pub fn from_strs_array_in( + strings: [&str; N], + allocator: &'a Allocator, + ) -> Atom<'a> { + Self::from(allocator.alloc_concat_strs_array(strings)) + } } impl<'new_alloc> CloneIn<'new_alloc> for Atom<'_> { diff --git a/crates/oxc_transformer/src/typescript/rewrite_extensions.rs b/crates/oxc_transformer/src/typescript/rewrite_extensions.rs index 70512724c84ad..e7153b0ee8f5c 100644 --- a/crates/oxc_transformer/src/typescript/rewrite_extensions.rs +++ b/crates/oxc_transformer/src/typescript/rewrite_extensions.rs @@ -5,7 +5,6 @@ //! //! Based on Babel's [plugin-rewrite-ts-imports](https://github.com/babel/babel/blob/3bcfee232506a4cebe410f02042fb0f0adeeb0b1/packages/babel-preset-typescript/src/plugin-rewrite-ts-imports.ts) -use oxc_allocator::String as ArenaString; use oxc_ast::ast::{ ExportAllDeclaration, ExportNamedDeclaration, ImportDeclaration, StringLiteral, }; @@ -43,9 +42,7 @@ impl TypeScriptRewriteExtensions { source.value = if self.mode.is_remove() { Atom::from(without_extension) } else { - let new_value = - ArenaString::from_strs_array_in([without_extension, replace], ctx.ast.allocator); - Atom::from(new_value) + ctx.ast.atom_from_strs_array([without_extension, replace]) }; source.raw = None; } diff --git a/crates/oxc_traverse/src/context/uid.rs b/crates/oxc_traverse/src/context/uid.rs index 58fb968ede694..81d682bc8d83c 100644 --- a/crates/oxc_traverse/src/context/uid.rs +++ b/crates/oxc_traverse/src/context/uid.rs @@ -285,8 +285,8 @@ impl<'a> UidGenerator<'a> { let mut buffer = ItoaBuffer::new(); let digits = buffer.format(uid_name.postfix); - let uid = if uid_name.underscore_count == 1 { - ArenaString::from_strs_array_in(["_", base, digits], self.allocator) + if uid_name.underscore_count == 1 { + Atom::from_strs_array_in(["_", base, digits], self.allocator) } else { let mut uid = ArenaString::with_capacity_in( uid_name.underscore_count as usize + base.len() + digits.len(), @@ -295,12 +295,10 @@ impl<'a> UidGenerator<'a> { uid.extend(iter::repeat_n("_", uid_name.underscore_count as usize)); uid.push_str(base); uid.push_str(digits); - uid - }; - - Atom::from(uid) + Atom::from(uid) + } } else { - let uid = Atom::from(ArenaString::from_strs_array_in(["_", base], self.allocator)); + let uid = Atom::from_strs_array_in(["_", base], self.allocator); // SAFETY: String starts with `_`, so trimming off that byte leaves a valid UTF-8 string let base = unsafe { uid.as_str().get_unchecked(1..) }; self.names.insert(base, UidName { underscore_count: 1, postfix: 1 });