Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions crates/oxc_allocator/src/allocator.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
use std::{alloc::Layout, ptr, slice, str};

use bumpalo::Bump;

use oxc_data_structures::assert_unchecked;

/// A bump-allocated memory arena.
///
/// # Anatomy of an Allocator
Expand Down Expand Up @@ -288,6 +292,109 @@ impl Allocator {
self.bump.alloc_str(src)
}

/// Create new `&str` from a fixed-size array of `&str`s concatenated together,
/// allocated in the given `allocator`.
///
/// # Panics
///
/// Panics if the sum of length of all strings exceeds `isize::MAX`.
///
/// # Example
/// ```
/// use oxc_allocator::Allocator;
///
/// let allocator = Allocator::new();
/// let s = allocator.alloc_concat_strs_array(["hello", " ", "world", "!"]);
/// assert_eq!(s, "hello world!");
/// ```
// `#[inline(always)]` because want compiler to be able to remove checked addition where some of
// `strings` are statically known.
#[expect(clippy::inline_always)]
#[inline(always)]
pub fn alloc_concat_strs_array<'a, const N: usize>(&'a self, strings: [&str; N]) -> &'a str {
// Calculate total length of all the strings concatenated.
//
// We have to use `checked_add` here to guard against additions wrapping around
// if some of the input `&str`s are very long, or there's many of them.
//
// However, `&str`s have max length of `isize::MAX`.
// https://users.rust-lang.org/t/does-str-reliably-have-length-isize-max/126777
// Use `assert_unchecked!` to communicate this invariant to compiler, which allows it to
// optimize out the overflow checks where some of `strings` are static, so their size is known.
//
// e.g. `allocator.from_strs_array_in(["__vite_ssr_import_", str, "__"])`, for example,
// requires no checks at all, because the static parts have total length of 20 bytes,
// and `str` has max length of `isize::MAX`. `isize::MAX as usize + 20` cannot overflow `usize`.
// Compiler can see that, and removes the overflow check.
// https://godbolt.org/z/MGh44Yz5d
#[expect(clippy::checked_conversions)]
let total_len = strings.iter().fold(0usize, |total_len, s| {
let len = s.len();
// SAFETY: `&str`s have maximum length of `isize::MAX`
unsafe { assert_unchecked!(len <= (isize::MAX as usize)) };
total_len.checked_add(len).unwrap()
});
assert!(
isize::try_from(total_len).is_ok(),
"attempted to create a string longer than `isize::MAX` bytes"
);

// Create actual `&str` in a separate function, to ensure that `alloc_concat_strs_array`
// is inlined, so that compiler has knowledge to remove the overflow checks above.
// When some of `strings` are static, this function is usually only a few instructions.
// Compiler can choose whether or not to inline `alloc_concat_strs_array_with_total_len_in`.
// SAFETY: `total_len` has been calculated correctly above.
// `total_len` is `<= isize::MAX`.
unsafe { self.alloc_concat_strs_array_with_total_len_in(strings, total_len) }
}

/// Create a new `&str` from a fixed-size array of `&str`s concatenated together,
/// allocated in the given `allocator`, with provided `total_len`.
///
/// # SAFETY
/// * `total_len` must be the total length of all `strings` concatenated.
/// * `total_len` must be `<= isize::MAX`.
unsafe fn alloc_concat_strs_array_with_total_len_in<'a, const N: usize>(
&'a self,
strings: [&str; N],
total_len: usize,
) -> &'a str {
if total_len == 0 {
return "";
}

// Allocate `total_len` bytes.
// SAFETY: Caller guarantees `total_len <= isize::MAX`.
let layout = unsafe { Layout::from_size_align_unchecked(total_len, 1) };
let start_ptr = self.bump().alloc_layout(layout);

let mut end_ptr = start_ptr;
for str in strings {
let src_ptr = str.as_ptr();
let len = str.len();

// SAFETY:
// `src` is obtained from a `&str` with length `len`, so is valid for reading `len` bytes.
// `end_ptr` is within bounds of the allocation. So is `end_ptr + len`.
// `u8` has no alignment requirements, so `src_ptr` and `end_ptr` are sufficiently aligned.
// No overlapping, because we're copying from an existing `&str` to a newly allocated buffer.
unsafe { ptr::copy_nonoverlapping(src_ptr, end_ptr.as_ptr(), len) };

// SAFETY: We allocated sufficient capacity for all the strings concatenated.
// So `end_ptr.add(len)` cannot go out of bounds.
end_ptr = unsafe { end_ptr.add(len) };
}

debug_assert_eq!(end_ptr.as_ptr() as usize - start_ptr.as_ptr() as usize, total_len);

// SAFETY: We have allocated and filled `total_len` bytes starting at `start_ptr`.
// Concatenating multiple `&str`s results in a valid UTF-8 string.
unsafe {
let slice = slice::from_raw_parts(start_ptr.as_ptr(), total_len);
str::from_utf8_unchecked(slice)
}
}

/// Reset this allocator.
///
/// Performs mass deallocation on everything allocated in this arena by resetting the pointer
Expand Down Expand Up @@ -469,4 +576,48 @@ mod test {
}
allocator.reset();
}

#[test]
fn string_from_array_len_1() {
let allocator = Allocator::default();
let s = allocator.alloc_concat_strs_array(["hello"]);
assert_eq!(s, "hello");
}

#[test]
fn string_from_array_len_2() {
let allocator = Allocator::default();
let s = allocator.alloc_concat_strs_array(["hello", "world!"]);
assert_eq!(s, "helloworld!");
}

#[test]
fn string_from_array_len_3() {
let hello = "hello";
let world = std::string::String::from("world");
let allocator = Allocator::default();
let s = allocator.alloc_concat_strs_array([hello, &world, "!"]);
assert_eq!(s, "helloworld!");
}

#[test]
fn string_from_empty_array() {
let allocator = Allocator::default();
let s = allocator.alloc_concat_strs_array([]);
assert_eq!(s, "");
}

#[test]
fn string_from_array_of_empty_strs() {
let allocator = Allocator::default();
let s = allocator.alloc_concat_strs_array(["", "", ""]);
assert_eq!(s, "");
}

#[test]
fn string_from_array_containing_some_empty_strs() {
let allocator = Allocator::default();
let s = allocator.alloc_concat_strs_array(["", "hello", ""]);
assert_eq!(s, "hello");
}
}
3 changes: 3 additions & 0 deletions crates/oxc_allocator/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ impl<'alloc> String<'alloc> {
/// Create a new [`String`] from a fixed-size array of `&str`s concatenated together,
/// allocated in the given `allocator`.
///
/// If you're not altering the `String` after this call, and just converting it to an `Atom`,
/// `Atom::from_strs_array_in` may be slightly more efficient.
///
/// # Examples
/// ```
/// use oxc_allocator::{Allocator, String};
Expand Down
7 changes: 3 additions & 4 deletions crates/oxc_ast/src/ast_builder_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use std::borrow::Cow;

use oxc_allocator::{Allocator, AllocatorAccessor, Box, FromIn, IntoIn, String, Vec};
use oxc_allocator::{Allocator, AllocatorAccessor, Box, FromIn, IntoIn, Vec};
use oxc_span::{Atom, SPAN, Span};
use oxc_syntax::{number::NumberBase, operator::UnaryOperator, scope::ScopeId};

Expand Down Expand Up @@ -90,9 +90,8 @@ impl<'a> AstBuilder<'a> {

/// Allocate an [`Atom`] from an array of string slices.
#[inline]
pub fn atom_from_strs_array<const N: usize>(self, array: [&str; N]) -> Atom<'a> {
let string = String::from_strs_array_in(array, self.allocator);
Atom::from(string)
pub fn atom_from_strs_array<const N: usize>(self, strings: [&str; N]) -> Atom<'a> {
Atom::from_strs_array_in(strings, self.allocator)
}

/// Convert a [`Cow<'a, str>`] to an [`Atom<'a>`].
Expand Down
27 changes: 27 additions & 0 deletions crates/oxc_span/src/atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,33 @@ impl<'a> Atom<'a> {
pub fn to_compact_str(self) -> CompactStr {
CompactStr::new(self.as_str())
}

/// Create new [`Atom`] from a fixed-size array of `&str`s concatenated together,
/// allocated in the given `allocator`.
///
/// # Panics
///
/// Panics if the sum of length of all strings exceeds `isize::MAX`.
///
/// # Example
/// ```
/// use oxc_allocator::Allocator;
/// use oxc_span::Atom;
///
/// let allocator = Allocator::new();
/// let s = Atom::from_strs_array_in(["hello", " ", "world", "!"], &allocator);
/// assert_eq!(s.as_str(), "hello world!");
/// ```
// `#[inline(always)]` because want compiler to be able to optimize where some of `strings`
// are statically known. See `Allocator::alloc_concat_strs_array`.
#[expect(clippy::inline_always)]
#[inline(always)]
pub fn from_strs_array_in<const N: usize>(
strings: [&str; N],
allocator: &'a Allocator,
) -> Atom<'a> {
Self::from(allocator.alloc_concat_strs_array(strings))
}
}

impl<'new_alloc> CloneIn<'new_alloc> for Atom<'_> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
//!
//! Based on Babel's [plugin-rewrite-ts-imports](https://github.com/babel/babel/blob/3bcfee232506a4cebe410f02042fb0f0adeeb0b1/packages/babel-preset-typescript/src/plugin-rewrite-ts-imports.ts)

use oxc_allocator::String as ArenaString;
use oxc_ast::ast::{
ExportAllDeclaration, ExportNamedDeclaration, ImportDeclaration, StringLiteral,
};
Expand Down Expand Up @@ -43,9 +42,7 @@ impl TypeScriptRewriteExtensions {
source.value = if self.mode.is_remove() {
Atom::from(without_extension)
} else {
let new_value =
ArenaString::from_strs_array_in([without_extension, replace], ctx.ast.allocator);
Atom::from(new_value)
ctx.ast.atom_from_strs_array([without_extension, replace])
};
source.raw = None;
}
Expand Down
12 changes: 5 additions & 7 deletions crates/oxc_traverse/src/context/uid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,8 @@ impl<'a> UidGenerator<'a> {
let mut buffer = ItoaBuffer::new();
let digits = buffer.format(uid_name.postfix);

let uid = if uid_name.underscore_count == 1 {
ArenaString::from_strs_array_in(["_", base, digits], self.allocator)
if uid_name.underscore_count == 1 {
Atom::from_strs_array_in(["_", base, digits], self.allocator)
} else {
let mut uid = ArenaString::with_capacity_in(
uid_name.underscore_count as usize + base.len() + digits.len(),
Expand All @@ -295,12 +295,10 @@ impl<'a> UidGenerator<'a> {
uid.extend(iter::repeat_n("_", uid_name.underscore_count as usize));
uid.push_str(base);
uid.push_str(digits);
uid
};

Atom::from(uid)
Atom::from(uid)
}
} else {
let uid = Atom::from(ArenaString::from_strs_array_in(["_", base], self.allocator));
let uid = Atom::from_strs_array_in(["_", base], self.allocator);
// SAFETY: String starts with `_`, so trimming off that byte leaves a valid UTF-8 string
let base = unsafe { uid.as_str().get_unchecked(1..) };
self.names.insert(base, UidName { underscore_count: 1, postfix: 1 });
Expand Down
Loading