Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/oxc_allocator/src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ impl<'a> FromIn<'a, String> for crate::String<'a> {
impl<'a> FromIn<'a, String> for &'a str {
#[inline(always)]
fn from_in(value: String, allocator: &'a Allocator) -> Self {
crate::String::from_str_in(value.as_str(), allocator).into_bump_str()
allocator.alloc_str(value.as_str())
}
}

Expand Down
3 changes: 2 additions & 1 deletion crates/oxc_allocator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ use std::{
ops::{Deref, DerefMut},
};

pub use bumpalo::collections::String;
use bumpalo::Bump;

mod address;
Expand All @@ -53,13 +52,15 @@ mod boxed;
mod clone_in;
mod convert;
pub mod hash_map;
pub mod string;
mod vec;

pub use address::{Address, GetAddress};
pub use boxed::Box;
pub use clone_in::CloneIn;
pub use convert::{FromIn, IntoIn};
pub use hash_map::HashMap;
pub use string::String;
pub use vec::Vec;

/// A bump-allocated memory arena based on [bumpalo].
Expand Down
249 changes: 249 additions & 0 deletions crates/oxc_allocator/src/string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
//! Arena String.
//!
//! See [`String`] for more details.

// All methods which just delegate to `bumpalo::collections::String` methods marked `#[inline(always)]`
#![expect(clippy::inline_always)]

use std::{
fmt::{self, Debug, Display},
hash::{Hash, Hasher},
mem::ManuallyDrop,
ops::{Deref, DerefMut},
};

use bumpalo::collections::String as BumpaloString;
use simdutf8::basic::from_utf8;
pub use simdutf8::basic::Utf8Error;

use crate::{Allocator, Vec};

/// Arena String.
///
/// UTF-8 encoded, growable string. Identical to [`std::string::String`] except that it stores
/// string contents in arena allocator.
#[derive(PartialOrd, Eq, Ord)]
pub struct String<'alloc>(BumpaloString<'alloc>);

impl<'alloc> String<'alloc> {
/// Creates a new empty [`String`].
///
/// Given that the `String` is empty, this will not allocate any initial
/// buffer. While that means that this initial operation is very
/// inexpensive, it may cause excessive allocation later when you add
/// data. If you have an idea of how much data the `String` will hold,
/// consider the [`with_capacity_in`] method to prevent excessive
/// re-allocation.
///
/// [`with_capacity_in`]: String::with_capacity_in
#[inline(always)]
pub fn new_in(allocator: &'alloc Allocator) -> String<'alloc> {
Self(BumpaloString::new_in(allocator))
}

/// Creates a new empty [`String`] with specified capacity.
///
/// `String`s have an internal buffer to hold their data. The capacity is
/// the length of that buffer, and can be queried with the `capacity`
/// method. This method creates an empty `String`, but one with an initial
/// buffer that can hold `capacity` bytes. This is useful when you may be
/// appending a bunch of data to the `String`, reducing the number of
/// reallocations it needs to do.
///
/// If the given capacity is `0`, no allocation will occur, and this method
/// is identical to the [`new_in`] method.
///
/// [`capacity`]: String::capacity
/// [`new_in`]: String::new_in
#[inline(always)]
pub fn with_capacity_in(capacity: usize, allocator: &'alloc Allocator) -> String<'alloc> {
Self(BumpaloString::with_capacity_in(capacity, allocator))
}

/// Construct a new [`String`] from a string slice.
///
/// # Examples
///
/// ```
/// use oxc_allocator::{Allocator, String};
///
/// let allocator = Allocator::default();
///
/// let s = String::from_str_in("hello", &allocator);
/// assert_eq!(s, "hello");
/// ```
#[inline(always)]
pub fn from_str_in(s: &str, allocator: &'alloc Allocator) -> String<'alloc> {
Self(BumpaloString::from_str_in(s, allocator))
}

/// Convert `Vec<u8>` into [`String`].
///
/// # Errors
/// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string.
pub fn from_utf8(bytes: Vec<'alloc, u8>) -> Result<String<'alloc>, Utf8Error> {
// Check vec comprises a valid UTF-8 string.
from_utf8(&bytes)?;
// SAFETY: We just checked it's a valid UTF-8 string
let s = unsafe { Self::from_utf8_unchecked(bytes) };
Ok(s)
}

/// Convert `Vec<u8>` into [`String`], without checking bytes comprise a valid UTF-8 string.
///
/// Does not copy the contents of the `Vec`, converts in place. This is a zero-cost operation.
///
/// # SAFETY
/// Caller must ensure this `Vec<u8>` comprises a valid UTF-8 string.
//
// `#[inline(always)]` because this is a no-op at runtime
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
#[inline(always)]
pub unsafe fn from_utf8_unchecked(bytes: Vec<'alloc, u8>) -> String<'alloc> {
// Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`,
// and our inner `Vec` type is `allocator_api2::vec::Vec`.
// SAFETY: Conversion is safe because both types store data in arena in same way.
// Lifetime of returned `String` is same as lifetime of original `Vec<u8>`.
let inner = ManuallyDrop::into_inner(bytes.0);
let (ptr, len, capacity, bump) = inner.into_raw_parts_with_alloc();
Self(BumpaloString::from_raw_parts_in(ptr, len, capacity, bump))
}

/// Creates a new [`String`] from a length, capacity, and pointer.
///
/// # SAFETY
///
/// This is highly unsafe, due to the number of invariants that aren't checked:
///
/// * The memory at `ptr` needs to have been previously allocated by the same [`Allocator`].
/// * `length` needs to be less than or equal to `capacity`.
/// * `capacity` needs to be the correct value.
///
/// Violating these may cause problems like corrupting the allocator's internal data structures.
///
/// The ownership of `ptr` is effectively transferred to the `String` which may then deallocate,
/// reallocate or change the contents of memory pointed to by the pointer at will. Ensure that
/// nothing else uses the pointer after calling this function.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::mem;
/// use oxc_allocator::{Allocator, String};
///
/// let allocator = Allocator::default();
///
/// unsafe {
/// let mut s = String::from_str_in("hello", &allocator);
/// let ptr = s.as_mut_ptr();
/// let len = s.len();
/// let capacity = s.capacity();
///
/// mem::forget(s);
///
/// let s = String::from_raw_parts_in(ptr, len, capacity, &allocator);
///
/// assert_eq!(s, "hello");
/// }
/// ```
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
#[inline(always)]
pub unsafe fn from_raw_parts_in(
buf: *mut u8,
length: usize,
capacity: usize,
allocator: &'alloc Allocator,
) -> String<'alloc> {
// SAFETY: Safety conditions of this method are the same as `BumpaloString`'s method
Self(BumpaloString::from_raw_parts_in(buf, length, capacity, allocator))
}

/// Convert this `String<'alloc>` into an `&'alloc str`. This is analogous to
/// [`std::string::String::into_boxed_str`].
///
/// # Example
///
/// ```
/// use oxc_allocator::{Allocator, String};
///
/// let allocator = Allocator::default();
///
/// let s = String::from_str_in("foo", &allocator);
/// assert_eq!(s.into_bump_str(), "foo");
/// ```
#[inline(always)]
pub fn into_bump_str(self) -> &'alloc str {
self.0.into_bump_str()
}
}

// Provide access to all `bumpalo::String`'s methods via deref
impl<'alloc> Deref for String<'alloc> {
type Target = BumpaloString<'alloc>;

#[inline]
fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<'alloc> DerefMut for String<'alloc> {
#[inline]
fn deref_mut(&mut self) -> &mut BumpaloString<'alloc> {
&mut self.0
}
}

impl PartialEq for String<'_> {
#[inline]
fn eq(&self, other: &String) -> bool {
PartialEq::eq(&self[..], &other[..])
}
}

// `impl_eq!` macro copied from `bumpalo`
macro_rules! impl_eq {
($lhs:ty, $rhs: ty) => {
impl<'a, 'alloc> PartialEq<$rhs> for $lhs {
#[inline]
fn eq(&self, other: &$rhs) -> bool {
PartialEq::eq(&self[..], &other[..])
}
}

impl<'a, 'alloc> PartialEq<$lhs> for $rhs {
#[inline]
fn eq(&self, other: &$lhs) -> bool {
PartialEq::eq(&self[..], &other[..])
}
}
};
}

impl_eq! { String<'alloc>, str }
impl_eq! { String<'alloc>, &'a str }
impl_eq! { std::borrow::Cow<'a, str>, String<'alloc> }
impl_eq! { std::string::String, String<'alloc> }

impl Display for String<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Display::fmt(self.as_str(), f)
}
}

impl Debug for String<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Debug::fmt(self.as_str(), f)
}
}

impl Hash for String<'_> {
#[inline]
fn hash<H: Hasher>(&self, hasher: &mut H) {
self.as_str().hash(hasher);
}
}
21 changes: 6 additions & 15 deletions crates/oxc_allocator/src/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use allocator_api2::vec::Vec as InnerVec;
use bumpalo::Bump;
#[cfg(any(feature = "serialize", test))]
use serde::{ser::SerializeSeq, Serialize, Serializer};
use simdutf8::basic::{from_utf8, Utf8Error};
use simdutf8::basic::Utf8Error;

use crate::{Allocator, Box, String};

Expand All @@ -32,7 +32,7 @@ use crate::{Allocator, Box, String};
/// Note: This is not a soundness issue, as Rust does not support relying on `drop`
/// being called to guarantee soundness.
#[derive(PartialEq, Eq)]
pub struct Vec<'alloc, T>(ManuallyDrop<InnerVec<T, &'alloc Bump>>);
pub struct Vec<'alloc, T>(pub(crate) ManuallyDrop<InnerVec<T, &'alloc Bump>>);

/// SAFETY: Not actually safe, but for enabling `Send` for downstream crates.
unsafe impl<T> Send for Vec<'_, T> {}
Expand Down Expand Up @@ -190,16 +190,12 @@ impl<'alloc, T> Vec<'alloc, T> {
}

impl<'alloc> Vec<'alloc, u8> {
/// Convert `Vec<u8>` into `String`.
/// Convert `Vec<u8>` into [`String`].
///
/// # Errors
/// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string.
pub fn into_string(self) -> Result<String<'alloc>, Utf8Error> {
// Check vec comprises a valid UTF-8 string.
from_utf8(&self.0)?;
// SAFETY: We just checked it's a valid UTF-8 string
let s = unsafe { self.into_string_unchecked() };
Ok(s)
String::from_utf8(self)
}

/// Convert `Vec<u8>` into [`String`], without checking bytes comprise a valid UTF-8 string.
Expand All @@ -211,13 +207,8 @@ impl<'alloc> Vec<'alloc, u8> {
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
#[inline(always)] // `#[inline(always)]` because this is a no-op at runtime
pub unsafe fn into_string_unchecked(self) -> String<'alloc> {
// Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`,
// and our inner `Vec` type is `allocator_api2::vec::Vec`.
// SAFETY: Conversion is safe because both types store data in arena in same way.
// Lifetime of returned `String` is same as lifetime of original `Vec<u8>`.
let inner = ManuallyDrop::into_inner(self.0);
let (ptr, len, cap, bump) = inner.into_raw_parts_with_alloc();
String::from_raw_parts_in(ptr, len, cap, bump)
// SAFETY: Caller guarantees vec comprises a valid UTF-8 string.
String::from_utf8_unchecked(self)
}
}

Expand Down
4 changes: 2 additions & 2 deletions crates/oxc_ast/src/ast_builder_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

use std::{borrow::Cow, mem};

use oxc_allocator::{Allocator, Box, FromIn, String, Vec};
use oxc_allocator::{Allocator, Box, FromIn, Vec};
use oxc_span::{Atom, Span, SPAN};
use oxc_syntax::{number::NumberBase, operator::UnaryOperator, scope::ScopeId};

Expand Down Expand Up @@ -78,7 +78,7 @@ impl<'a> AstBuilder<'a> {
/// in the heap.
#[inline]
pub fn str(self, value: &str) -> &'a str {
String::from_str_in(value, self.allocator).into_bump_str()
self.allocator.alloc_str(value)
}

/// Allocate an [`Atom`] from a string slice.
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_prettier/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ macro_rules! text {
#[macro_export]
macro_rules! dynamic_text {
($p:ident, $str:expr) => {{
let s = oxc_allocator::String::from_str_in($str, $p.allocator).into_bump_str();
let s = $p.allocator.alloc_str($str);
$crate::ir::Doc::Str(s)
}};
}
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_span/src/atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl<'alloc> FromIn<'alloc, &Atom<'alloc>> for Atom<'alloc> {

impl<'alloc> FromIn<'alloc, &str> for Atom<'alloc> {
fn from_in(s: &str, allocator: &'alloc Allocator) -> Self {
Self::from(oxc_allocator::String::from_str_in(s, allocator))
Self::from(&*allocator.alloc_str(s))
}
}

Expand Down
Loading