From bdcbeb42ae13cf4d5f551fbce6f000eeaa04ea2a Mon Sep 17 00:00:00 2001 From: Dunqing <29533304+Dunqing@users.noreply.github.com> Date: Sun, 27 Apr 2025 02:39:10 +0000 Subject: [PATCH] perf(traverse): use `ArenaString` instead `CompactString` to store UID name (#10562) close: #10545 --- Cargo.lock | 1 - .../src/common/arrow_function_converter.rs | 9 ++- .../es2022/class_properties/constructor.rs | 4 +- crates/oxc_traverse/Cargo.toml | 1 - crates/oxc_traverse/src/context/mod.rs | 11 ++-- crates/oxc_traverse/src/context/scoping.rs | 58 +++++++++++-------- 6 files changed, 46 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 81eb7e9c67293..4f0e2f91841ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2267,7 +2267,6 @@ dependencies = [ name = "oxc_traverse" version = "0.66.0" dependencies = [ - "compact_str", "itoa", "oxc_allocator", "oxc_ast", diff --git a/crates/oxc_transformer/src/common/arrow_function_converter.rs b/crates/oxc_transformer/src/common/arrow_function_converter.rs index 8f1368ada0a2b..fce263c21ed98 100644 --- a/crates/oxc_transformer/src/common/arrow_function_converter.rs +++ b/crates/oxc_transformer/src/common/arrow_function_converter.rs @@ -1039,11 +1039,10 @@ impl<'a> ArrowFunctionConverter<'a> { let binding = self.arguments_var_stack.last_or_init(|| { if let Some(symbol_id) = symbol_id { let arguments_name = ctx.generate_uid_name("arguments"); - let arguments_name_atom = ctx.ast.atom(&arguments_name); - Self::rename_arguments_symbol(symbol_id, arguments_name, ctx); + Self::rename_arguments_symbol(symbol_id, arguments_name.into_compact_str(), ctx); // Record the symbol ID as a renamed `arguments` variable. self.renamed_arguments_symbol_ids.insert(symbol_id); - BoundIdentifier::new(arguments_name_atom, symbol_id) + BoundIdentifier::new(arguments_name, symbol_id) } else { // We cannot determine the final scope ID of the `arguments` variable insertion, // because the `arguments` variable will be inserted to a new scope which haven't been created yet, @@ -1081,9 +1080,9 @@ impl<'a> ArrowFunctionConverter<'a> { self.arguments_var_stack.last_or_init(|| { let arguments_name = ctx.generate_uid_name("arguments"); - ident.name = ctx.ast.atom(&arguments_name); + ident.name = arguments_name; let symbol_id = ident.symbol_id(); - Self::rename_arguments_symbol(symbol_id, arguments_name, ctx); + Self::rename_arguments_symbol(symbol_id, arguments_name.into_compact_str(), ctx); // Record the symbol ID as a renamed `arguments` variable. self.renamed_arguments_symbol_ids.insert(symbol_id); BoundIdentifier::new(ident.name, symbol_id) diff --git a/crates/oxc_transformer/src/es2022/class_properties/constructor.rs b/crates/oxc_transformer/src/es2022/class_properties/constructor.rs index 65a2c6831fab3..7db4e9f08e101 100644 --- a/crates/oxc_transformer/src/es2022/class_properties/constructor.rs +++ b/crates/oxc_transformer/src/es2022/class_properties/constructor.rs @@ -444,9 +444,9 @@ impl<'a> ClassProperties<'a, '_> { // Generate replacement UID name let new_name = ctx.generate_uid_name(name); // Save replacement name in `clashing_symbols` - *name = ctx.ast.atom(&new_name); + *name = new_name; // Rename symbol and binding - ctx.rename_symbol(symbol_id, constructor_scope_id, new_name); + ctx.rename_symbol(symbol_id, constructor_scope_id, new_name.into_compact_str()); } // Rename identifiers for clashing symbols in constructor params and body diff --git a/crates/oxc_traverse/Cargo.toml b/crates/oxc_traverse/Cargo.toml index 6542566ba6ea9..590ce7830ae80 100644 --- a/crates/oxc_traverse/Cargo.toml +++ b/crates/oxc_traverse/Cargo.toml @@ -32,6 +32,5 @@ oxc_semantic = { workspace = true } oxc_span = { workspace = true } oxc_syntax = { workspace = true } -compact_str = { workspace = true } itoa = { workspace = true } rustc-hash = { workspace = true } diff --git a/crates/oxc_traverse/src/context/mod.rs b/crates/oxc_traverse/src/context/mod.rs index 34265c90424f1..3d217c26cd426 100644 --- a/crates/oxc_traverse/src/context/mod.rs +++ b/crates/oxc_traverse/src/context/mod.rs @@ -114,7 +114,7 @@ pub use scoping::TraverseScoping; /// [`alloc`]: `TraverseCtx::alloc` pub struct TraverseCtx<'a> { pub ancestry: TraverseAncestry<'a>, - pub scoping: TraverseScoping, + pub scoping: TraverseScoping<'a>, pub ast: AstBuilder<'a>, } @@ -328,7 +328,7 @@ impl<'a> TraverseCtx<'a> { /// the resulting scopes will be: /// ```ts /// parentScope1: { - /// newScope: { + /// newScope: { /// childScope: { } /// } /// childScope2: { } @@ -396,8 +396,8 @@ impl<'a> TraverseCtx<'a> { /// There are some potential "gotchas". /// /// This is a shortcut for `ctx.scoping.generate_uid_name`. - pub fn generate_uid_name(&mut self, name: &str) -> CompactStr { - self.scoping.generate_uid_name(name) + pub fn generate_uid_name(&mut self, name: &str) -> Atom<'a> { + self.scoping.generate_uid_name(name, self.ast.allocator) } /// Generate UID. @@ -413,9 +413,8 @@ impl<'a> TraverseCtx<'a> { ) -> BoundIdentifier<'a> { // Get name for UID let name = self.generate_uid_name(name); - let name_atom = self.ast.atom(&name); let symbol_id = self.scoping.add_binding(&name, scope_id, flags); - BoundIdentifier::new(name_atom, symbol_id) + BoundIdentifier::new(name, symbol_id) } /// Generate UID in current scope. diff --git a/crates/oxc_traverse/src/context/scoping.rs b/crates/oxc_traverse/src/context/scoping.rs index 06e13454691f2..0c333b1096d16 100644 --- a/crates/oxc_traverse/src/context/scoping.rs +++ b/crates/oxc_traverse/src/context/scoping.rs @@ -1,10 +1,9 @@ use std::str; -use compact_str::CompactString; use itoa::Buffer as ItoaBuffer; use rustc_hash::FxHashSet; -use oxc_allocator::Vec as ArenaVec; +use oxc_allocator::{Allocator, String as ArenaString, Vec as ArenaVec}; use oxc_ast::ast::*; use oxc_ast_visit::Visit; use oxc_semantic::{NodeId, Reference, Scoping}; @@ -23,16 +22,16 @@ use crate::{BoundIdentifier, scopes_collector::ChildScopeCollector}; /// /// `current_scope_id` is the ID of current scope during traversal. /// `walk_*` functions update this field when entering/exiting a scope. -pub struct TraverseScoping { +pub struct TraverseScoping<'a> { scoping: Scoping, - uid_names: Option>, + uid_names: Option>, current_scope_id: ScopeId, current_hoist_scope_id: ScopeId, current_block_scope_id: ScopeId, } // Public methods -impl TraverseScoping { +impl<'a> TraverseScoping<'a> { /// Get current scope ID #[inline] pub fn current_scope_id(&self) -> ScopeId { @@ -257,7 +256,7 @@ impl TraverseScoping { /// Generate binding. /// /// Creates a symbol with the provided name and flags and adds it to the specified scope. - pub fn generate_binding<'a>( + pub fn generate_binding( &mut self, name: Atom<'a>, scope_id: ScopeId, @@ -270,7 +269,7 @@ impl TraverseScoping { /// Generate binding in current scope. /// /// Creates a symbol with the provided name and flags and adds it to the current scope. - pub fn generate_binding_in_current_scope<'a>( + pub fn generate_binding_in_current_scope( &mut self, name: Atom<'a>, flags: SymbolFlags, @@ -354,17 +353,17 @@ impl TraverseScoping { /// i.e. if source contains identifiers `_foo` and `__bar`, create UIDs names `___0`, `___1`, /// `___2` etc. They'll all be unique within the program. #[expect(clippy::missing_panics_doc)] - pub fn generate_uid_name(&mut self, name: &str) -> CompactStr { + pub fn generate_uid_name(&mut self, name: &str, allocator: &'a Allocator) -> Atom<'a> { // If `uid_names` is not already populated, initialize it if self.uid_names.is_none() { - self.uid_names = Some(self.get_uid_names()); + self.uid_names = Some(self.get_uid_names(allocator)); } let uid_names = self.uid_names.as_mut().unwrap(); let base = get_uid_name_base(name); - let uid = get_unique_name(base, uid_names); - uid_names.insert(uid.clone()); - uid + let uid = get_unique_name(base, uid_names, allocator).into_bump_str(); + uid_names.insert(uid); + Atom::from(uid) } /// Create a reference bound to a `SymbolId` @@ -449,7 +448,7 @@ impl TraverseScoping { } // Methods used internally within crate -impl TraverseScoping { +impl<'a> TraverseScoping<'a> { /// Create new `TraverseScoping` pub(super) fn new(scoping: Scoping) -> Self { Self { @@ -492,14 +491,14 @@ impl TraverseScoping { /// /// Once this set is created, generating a UID is a relatively quick operation, rather than /// iterating over all symbols and unresolved references every time generate a UID. - fn get_uid_names(&self) -> FxHashSet { + fn get_uid_names(&self, allocator: &'a Allocator) -> FxHashSet<&'a str> { self.scoping .root_unresolved_references() .keys() .copied() .chain(self.scoping.symbol_names()) .filter(|name| name.as_bytes().first() == Some(&b'_')) - .map(CompactStr::from) + .map(|str| allocator.alloc_str(str)) .collect() } @@ -526,8 +525,12 @@ fn get_uid_name_base(name: &str) -> &str { unsafe { str::from_utf8_unchecked(bytes) } } -fn get_unique_name(base: &str, uid_names: &FxHashSet) -> CompactStr { - CompactStr::from(get_unique_name_impl(base, uid_names)) +fn get_unique_name<'a>( + base: &str, + uid_names: &FxHashSet<&'a str>, + allocator: &'a Allocator, +) -> ArenaString<'a> { + get_unique_name_impl(base, uid_names, allocator) } // TODO: We could make this function more performant, especially when it checks a lot of names @@ -538,17 +541,21 @@ fn get_unique_name(base: &str, uid_names: &FxHashSet) -> CompactStr // we could calculate an "unfinished" hash not including the last block, and then just add the final // block to "finish" the hash on each iteration. With `FxHash` this would be straight line code and only // a few operations. -fn get_unique_name_impl(base: &str, uid_names: &FxHashSet) -> CompactString { - // Create `CompactString` prepending name with `_`, and with 1 byte excess capacity. +fn get_unique_name_impl<'a>( + base: &str, + uid_names: &FxHashSet<&'a str>, + allocator: &'a Allocator, +) -> ArenaString<'a> { + // Create `ArenaString` prepending name with `_`, and with 1 byte excess capacity. // The extra byte is to avoid reallocation if need to add a digit on the end later, // which will not be too uncommon. // Having to add 2 digits will be uncommon, so we don't allocate 2 extra bytes for 2 digits. - let mut name = CompactString::with_capacity(base.len() + 2); + let mut name = ArenaString::with_capacity_in(base.len() + 2, allocator); name.push('_'); name.push_str(base); // It's fairly common that UIDs may need a numerical postfix, so we try to keep string - // operations to a minimum for postfixes up to 99 - reusing a single `CompactString`, + // operations to a minimum for postfixes up to 99 - reusing a single `ArenaString`, // rather than generating a new string on each attempt. // For speed we manipulate the string as bytes. // Postfixes greater than 99 should be very uncommon, so don't bother optimizing. @@ -625,9 +632,13 @@ fn get_unique_name_impl(base: &str, uid_names: &FxHashSet) -> Compac let mut buffer = ItoaBuffer::new(); for n in 100..=u32::MAX { let digits = buffer.format(n); + /* // SAFETY: `base_len` is always shorter than current `name.len()`, on a UTF-8 char boundary, // and `name` contains at least `base_len` initialized bytes unsafe { name.set_len(base_len) }; + */ + // workaround for `set_len` does not exist in `ArenaString` + name.truncate(base_len); name.push_str(digits); if !uid_names.contains(name.as_str()) { return name; @@ -757,8 +768,9 @@ fn test_get_unique_name() { ), ]; + let allocator = Allocator::default(); for (used, name, expected) in cases { - let used = used.iter().map(|name| CompactStr::from(*name)).collect(); - assert_eq!(get_unique_name(name, &used), expected); + let used = used.iter().copied().collect::>(); + assert_eq!(get_unique_name(name, &used, &allocator), *expected); } }