diff --git a/crates/oxc_allocator/src/convert.rs b/crates/oxc_allocator/src/convert.rs index 2f9122b817a0e..7a0f54666e757 100644 --- a/crates/oxc_allocator/src/convert.rs +++ b/crates/oxc_allocator/src/convert.rs @@ -49,7 +49,7 @@ impl<'a> FromIn<'a, String> for crate::String<'a> { impl<'a> FromIn<'a, String> for &'a str { #[inline(always)] fn from_in(value: String, allocator: &'a Allocator) -> Self { - crate::String::from_str_in(value.as_str(), allocator).into_bump_str() + allocator.alloc_str(value.as_str()) } } diff --git a/crates/oxc_allocator/src/lib.rs b/crates/oxc_allocator/src/lib.rs index c2b11bac5d0be..a0893175dec03 100644 --- a/crates/oxc_allocator/src/lib.rs +++ b/crates/oxc_allocator/src/lib.rs @@ -44,7 +44,6 @@ use std::{ ops::{Deref, DerefMut}, }; -pub use bumpalo::collections::String; use bumpalo::Bump; mod address; @@ -53,6 +52,7 @@ mod boxed; mod clone_in; mod convert; pub mod hash_map; +pub mod string; mod vec; pub use address::{Address, GetAddress}; @@ -60,6 +60,7 @@ pub use boxed::Box; pub use clone_in::CloneIn; pub use convert::{FromIn, IntoIn}; pub use hash_map::HashMap; +pub use string::String; pub use vec::Vec; /// A bump-allocated memory arena based on [bumpalo]. diff --git a/crates/oxc_allocator/src/string.rs b/crates/oxc_allocator/src/string.rs new file mode 100644 index 0000000000000..e87a4f1d148df --- /dev/null +++ b/crates/oxc_allocator/src/string.rs @@ -0,0 +1,249 @@ +//! Arena String. +//! +//! See [`String`] for more details. + +// All methods which just delegate to `bumpalo::collections::String` methods marked `#[inline(always)]` +#![expect(clippy::inline_always)] + +use std::{ + fmt::{self, Debug, Display}, + hash::{Hash, Hasher}, + mem::ManuallyDrop, + ops::{Deref, DerefMut}, +}; + +use bumpalo::collections::String as BumpaloString; +use simdutf8::basic::from_utf8; +pub use simdutf8::basic::Utf8Error; + +use crate::{Allocator, Vec}; + +/// Arena String. +/// +/// UTF-8 encoded, growable string. Identical to [`std::string::String`] except that it stores +/// string contents in arena allocator. +#[derive(PartialOrd, Eq, Ord)] +pub struct String<'alloc>(BumpaloString<'alloc>); + +impl<'alloc> String<'alloc> { + /// Creates a new empty [`String`]. + /// + /// Given that the `String` is empty, this will not allocate any initial + /// buffer. While that means that this initial operation is very + /// inexpensive, it may cause excessive allocation later when you add + /// data. If you have an idea of how much data the `String` will hold, + /// consider the [`with_capacity_in`] method to prevent excessive + /// re-allocation. + /// + /// [`with_capacity_in`]: String::with_capacity_in + #[inline(always)] + pub fn new_in(allocator: &'alloc Allocator) -> String<'alloc> { + Self(BumpaloString::new_in(allocator)) + } + + /// Creates a new empty [`String`] with specified capacity. + /// + /// `String`s have an internal buffer to hold their data. The capacity is + /// the length of that buffer, and can be queried with the `capacity` + /// method. This method creates an empty `String`, but one with an initial + /// buffer that can hold `capacity` bytes. This is useful when you may be + /// appending a bunch of data to the `String`, reducing the number of + /// reallocations it needs to do. + /// + /// If the given capacity is `0`, no allocation will occur, and this method + /// is identical to the [`new_in`] method. + /// + /// [`capacity`]: String::capacity + /// [`new_in`]: String::new_in + #[inline(always)] + pub fn with_capacity_in(capacity: usize, allocator: &'alloc Allocator) -> String<'alloc> { + Self(BumpaloString::with_capacity_in(capacity, allocator)) + } + + /// Construct a new [`String`] from a string slice. + /// + /// # Examples + /// + /// ``` + /// use oxc_allocator::{Allocator, String}; + /// + /// let allocator = Allocator::default(); + /// + /// let s = String::from_str_in("hello", &allocator); + /// assert_eq!(s, "hello"); + /// ``` + #[inline(always)] + pub fn from_str_in(s: &str, allocator: &'alloc Allocator) -> String<'alloc> { + Self(BumpaloString::from_str_in(s, allocator)) + } + + /// Convert `Vec` into [`String`]. + /// + /// # Errors + /// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string. + pub fn from_utf8(bytes: Vec<'alloc, u8>) -> Result, Utf8Error> { + // Check vec comprises a valid UTF-8 string. + from_utf8(&bytes)?; + // SAFETY: We just checked it's a valid UTF-8 string + let s = unsafe { Self::from_utf8_unchecked(bytes) }; + Ok(s) + } + + /// Convert `Vec` into [`String`], without checking bytes comprise a valid UTF-8 string. + /// + /// Does not copy the contents of the `Vec`, converts in place. This is a zero-cost operation. + /// + /// # SAFETY + /// Caller must ensure this `Vec` comprises a valid UTF-8 string. + // + // `#[inline(always)]` because this is a no-op at runtime + #[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)] + #[inline(always)] + pub unsafe fn from_utf8_unchecked(bytes: Vec<'alloc, u8>) -> String<'alloc> { + // Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`, + // and our inner `Vec` type is `allocator_api2::vec::Vec`. + // SAFETY: Conversion is safe because both types store data in arena in same way. + // Lifetime of returned `String` is same as lifetime of original `Vec`. + let inner = ManuallyDrop::into_inner(bytes.0); + let (ptr, len, capacity, bump) = inner.into_raw_parts_with_alloc(); + Self(BumpaloString::from_raw_parts_in(ptr, len, capacity, bump)) + } + + /// Creates a new [`String`] from a length, capacity, and pointer. + /// + /// # SAFETY + /// + /// This is highly unsafe, due to the number of invariants that aren't checked: + /// + /// * The memory at `ptr` needs to have been previously allocated by the same [`Allocator`]. + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the correct value. + /// + /// Violating these may cause problems like corrupting the allocator's internal data structures. + /// + /// The ownership of `ptr` is effectively transferred to the `String` which may then deallocate, + /// reallocate or change the contents of memory pointed to by the pointer at will. Ensure that + /// nothing else uses the pointer after calling this function. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::mem; + /// use oxc_allocator::{Allocator, String}; + /// + /// let allocator = Allocator::default(); + /// + /// unsafe { + /// let mut s = String::from_str_in("hello", &allocator); + /// let ptr = s.as_mut_ptr(); + /// let len = s.len(); + /// let capacity = s.capacity(); + /// + /// mem::forget(s); + /// + /// let s = String::from_raw_parts_in(ptr, len, capacity, &allocator); + /// + /// assert_eq!(s, "hello"); + /// } + /// ``` + #[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)] + #[inline(always)] + pub unsafe fn from_raw_parts_in( + buf: *mut u8, + length: usize, + capacity: usize, + allocator: &'alloc Allocator, + ) -> String<'alloc> { + // SAFETY: Safety conditions of this method are the same as `BumpaloString`'s method + Self(BumpaloString::from_raw_parts_in(buf, length, capacity, allocator)) + } + + /// Convert this `String<'alloc>` into an `&'alloc str`. This is analogous to + /// [`std::string::String::into_boxed_str`]. + /// + /// # Example + /// + /// ``` + /// use oxc_allocator::{Allocator, String}; + /// + /// let allocator = Allocator::default(); + /// + /// let s = String::from_str_in("foo", &allocator); + /// assert_eq!(s.into_bump_str(), "foo"); + /// ``` + #[inline(always)] + pub fn into_bump_str(self) -> &'alloc str { + self.0.into_bump_str() + } +} + +// Provide access to all `bumpalo::String`'s methods via deref +impl<'alloc> Deref for String<'alloc> { + type Target = BumpaloString<'alloc>; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'alloc> DerefMut for String<'alloc> { + #[inline] + fn deref_mut(&mut self) -> &mut BumpaloString<'alloc> { + &mut self.0 + } +} + +impl PartialEq for String<'_> { + #[inline] + fn eq(&self, other: &String) -> bool { + PartialEq::eq(&self[..], &other[..]) + } +} + +// `impl_eq!` macro copied from `bumpalo` +macro_rules! impl_eq { + ($lhs:ty, $rhs: ty) => { + impl<'a, 'alloc> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + } + + impl<'a, 'alloc> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + } + }; +} + +impl_eq! { String<'alloc>, str } +impl_eq! { String<'alloc>, &'a str } +impl_eq! { std::borrow::Cow<'a, str>, String<'alloc> } +impl_eq! { std::string::String, String<'alloc> } + +impl Display for String<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(self.as_str(), f) + } +} + +impl Debug for String<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(self.as_str(), f) + } +} + +impl Hash for String<'_> { + #[inline] + fn hash(&self, hasher: &mut H) { + self.as_str().hash(hasher); + } +} diff --git a/crates/oxc_allocator/src/vec.rs b/crates/oxc_allocator/src/vec.rs index cafc79ab0f0d3..214e5bb1122bc 100644 --- a/crates/oxc_allocator/src/vec.rs +++ b/crates/oxc_allocator/src/vec.rs @@ -19,7 +19,7 @@ use allocator_api2::vec::Vec as InnerVec; use bumpalo::Bump; #[cfg(any(feature = "serialize", test))] use serde::{ser::SerializeSeq, Serialize, Serializer}; -use simdutf8::basic::{from_utf8, Utf8Error}; +use simdutf8::basic::Utf8Error; use crate::{Allocator, Box, String}; @@ -32,7 +32,7 @@ use crate::{Allocator, Box, String}; /// Note: This is not a soundness issue, as Rust does not support relying on `drop` /// being called to guarantee soundness. #[derive(PartialEq, Eq)] -pub struct Vec<'alloc, T>(ManuallyDrop>); +pub struct Vec<'alloc, T>(pub(crate) ManuallyDrop>); /// SAFETY: Not actually safe, but for enabling `Send` for downstream crates. unsafe impl Send for Vec<'_, T> {} @@ -190,16 +190,12 @@ impl<'alloc, T> Vec<'alloc, T> { } impl<'alloc> Vec<'alloc, u8> { - /// Convert `Vec` into `String`. + /// Convert `Vec` into [`String`]. /// /// # Errors /// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string. pub fn into_string(self) -> Result, Utf8Error> { - // Check vec comprises a valid UTF-8 string. - from_utf8(&self.0)?; - // SAFETY: We just checked it's a valid UTF-8 string - let s = unsafe { self.into_string_unchecked() }; - Ok(s) + String::from_utf8(self) } /// Convert `Vec` into [`String`], without checking bytes comprise a valid UTF-8 string. @@ -211,13 +207,8 @@ impl<'alloc> Vec<'alloc, u8> { #[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)] #[inline(always)] // `#[inline(always)]` because this is a no-op at runtime pub unsafe fn into_string_unchecked(self) -> String<'alloc> { - // Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`, - // and our inner `Vec` type is `allocator_api2::vec::Vec`. - // SAFETY: Conversion is safe because both types store data in arena in same way. - // Lifetime of returned `String` is same as lifetime of original `Vec`. - let inner = ManuallyDrop::into_inner(self.0); - let (ptr, len, cap, bump) = inner.into_raw_parts_with_alloc(); - String::from_raw_parts_in(ptr, len, cap, bump) + // SAFETY: Caller guarantees vec comprises a valid UTF-8 string. + String::from_utf8_unchecked(self) } } diff --git a/crates/oxc_ast/src/ast_builder_impl.rs b/crates/oxc_ast/src/ast_builder_impl.rs index 3b29825da9bbd..3b77a0de4025b 100644 --- a/crates/oxc_ast/src/ast_builder_impl.rs +++ b/crates/oxc_ast/src/ast_builder_impl.rs @@ -8,7 +8,7 @@ use std::{borrow::Cow, mem}; -use oxc_allocator::{Allocator, Box, FromIn, String, Vec}; +use oxc_allocator::{Allocator, Box, FromIn, Vec}; use oxc_span::{Atom, Span, SPAN}; use oxc_syntax::{number::NumberBase, operator::UnaryOperator, scope::ScopeId}; @@ -78,7 +78,7 @@ impl<'a> AstBuilder<'a> { /// in the heap. #[inline] pub fn str(self, value: &str) -> &'a str { - String::from_str_in(value, self.allocator).into_bump_str() + self.allocator.alloc_str(value) } /// Allocate an [`Atom`] from a string slice. diff --git a/crates/oxc_prettier/src/macros.rs b/crates/oxc_prettier/src/macros.rs index 577b55e0a5144..6c15fbf069ea8 100644 --- a/crates/oxc_prettier/src/macros.rs +++ b/crates/oxc_prettier/src/macros.rs @@ -47,7 +47,7 @@ macro_rules! text { #[macro_export] macro_rules! dynamic_text { ($p:ident, $str:expr) => {{ - let s = oxc_allocator::String::from_str_in($str, $p.allocator).into_bump_str(); + let s = $p.allocator.alloc_str($str); $crate::ir::Doc::Str(s) }}; } diff --git a/crates/oxc_span/src/atom.rs b/crates/oxc_span/src/atom.rs index 870164d972087..3bba8d6b45777 100644 --- a/crates/oxc_span/src/atom.rs +++ b/crates/oxc_span/src/atom.rs @@ -79,7 +79,7 @@ impl<'alloc> FromIn<'alloc, &Atom<'alloc>> for Atom<'alloc> { impl<'alloc> FromIn<'alloc, &str> for Atom<'alloc> { fn from_in(s: &str, allocator: &'alloc Allocator) -> Self { - Self::from(oxc_allocator::String::from_str_in(s, allocator)) + Self::from(&*allocator.alloc_str(s)) } }