From d826c6111f00670ff5f84eba58d6494c3c2e6355 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Fri, 2 May 2025 11:47:31 +0100 Subject: [PATCH 1/8] perf(transformer): add `debug` option --- .typos.toml | 1 + .../oxc_transformer/examples/transformer.rs | 1 + crates/oxc_transformer/src/context.rs | 3 + .../es2022/class_properties/computed_key.rs | 2 +- .../es2022/class_properties/private_field.rs | 2 +- .../class_properties/super_converter.rs | 6 +- crates/oxc_transformer/src/lib.rs | 8 +- crates/oxc_transformer/src/options/mod.rs | 5 + .../tests/integrations/es_target.rs | 6 +- crates/oxc_traverse/Cargo.toml | 2 +- .../src/ast_operations/gather_node_parts.rs | 6 +- crates/oxc_traverse/src/context/mod.rs | 8 +- crates/oxc_traverse/src/context/reusable.rs | 7 +- crates/oxc_traverse/src/context/scoping.rs | 20 +- crates/oxc_traverse/src/context/uid.rs | 548 ++++++++++++++++-- napi/playground/src/lib.rs | 3 +- napi/transform/index.d.ts | 2 + napi/transform/src/transformer.rs | 4 + napi/transform/test/transform.test.ts | 7 +- tasks/coverage/src/tools/semantic.rs | 1 + tasks/transform_conformance/src/test_case.rs | 5 +- 21 files changed, 567 insertions(+), 80 deletions(-) diff --git a/.typos.toml b/.typos.toml index 3342e35562872..3a5205fc6e94d 100644 --- a/.typos.toml +++ b/.typos.toml @@ -15,6 +15,7 @@ extend-exclude = [ "crates/oxc_linter/src/rules/react/no_unknown_property.rs", "crates/oxc_parser/src/lexer/byte_handlers.rs", "crates/oxc_syntax/src/xml_entities.rs", + "crates/oxc_traverse/src/context/uid.rs", "pnpm-lock.yaml", "tasks/coverage/babel", "tasks/coverage/test262", diff --git a/crates/oxc_transformer/examples/transformer.rs b/crates/oxc_transformer/examples/transformer.rs index 06f92581c11d5..e272a9030656c 100644 --- a/crates/oxc_transformer/examples/transformer.rs +++ b/crates/oxc_transformer/examples/transformer.rs @@ -72,6 +72,7 @@ fn main() { TransformOptions::enable_all() }; + transform_options.debug = true; transform_options.helper_loader.mode = HelperLoaderMode::External; let ret = Transformer::new(&allocator, path, &transform_options) diff --git a/crates/oxc_transformer/src/context.rs b/crates/oxc_transformer/src/context.rs index 6b5ec5e755ab1..f6b4a7c128f0b 100644 --- a/crates/oxc_transformer/src/context.rs +++ b/crates/oxc_transformer/src/context.rs @@ -33,6 +33,8 @@ pub struct TransformCtx<'a> { pub assumptions: CompilerAssumptions, + pub debug: bool, + // Helpers /// Manage helper loading pub helper_loader: HelperLoaderStore<'a>, @@ -60,6 +62,7 @@ impl TransformCtx<'_> { source_text: "", module: options.env.module, assumptions: options.assumptions, + debug: options.debug, helper_loader: HelperLoaderStore::new(&options.helper_loader), module_imports: ModuleImportsStore::new(), var_declarations: VarDeclarationsStore::new(), diff --git a/crates/oxc_transformer/src/es2022/class_properties/computed_key.rs b/crates/oxc_transformer/src/es2022/class_properties/computed_key.rs index b47780fc0cbcf..333523b880eb1 100644 --- a/crates/oxc_transformer/src/es2022/class_properties/computed_key.rs +++ b/crates/oxc_transformer/src/es2022/class_properties/computed_key.rs @@ -131,7 +131,7 @@ impl<'a> ClassProperties<'a, '_> { let AssignmentTarget::AssignmentTargetIdentifier(ident) = &assign_expr.left else { unreachable!(); }; - assert!(ident.name.starts_with('_')); + assert!(ident.name.starts_with(if self.ctx.debug { '_' } else { '$' })); assert!(ctx.scoping().get_reference(ident.reference_id()).symbol_id().is_some()); assert!(ident.span.is_empty()); assert!(prop.value.is_none()); diff --git a/crates/oxc_transformer/src/es2022/class_properties/private_field.rs b/crates/oxc_transformer/src/es2022/class_properties/private_field.rs index cd1870526d0d9..b5df2f05330cb 100644 --- a/crates/oxc_transformer/src/es2022/class_properties/private_field.rs +++ b/crates/oxc_transformer/src/es2022/class_properties/private_field.rs @@ -948,7 +948,7 @@ impl<'a> ClassProperties<'a, '_> { .. } = self.classes_stack.find_get_set_private_prop(&field_expr.field); - let temp_var_name_base = get_var_name_from_node(field_expr); + let temp_var_name_base = get_var_name_from_node(field_expr, self.ctx.debug); // TODO(improve-on-babel): Could avoid `move_expression` here and replace `update_expr.argument` instead. // Only doing this first to match the order Babel creates temp vars. diff --git a/crates/oxc_transformer/src/es2022/class_properties/super_converter.rs b/crates/oxc_transformer/src/es2022/class_properties/super_converter.rs index dfcc75db5ab48..df698b93c5f32 100644 --- a/crates/oxc_transformer/src/es2022/class_properties/super_converter.rs +++ b/crates/oxc_transformer/src/es2022/class_properties/super_converter.rs @@ -373,7 +373,8 @@ impl<'a> ClassPropertiesSuperConverter<'a, '_, '_> { unreachable!() }; - let temp_var_name_base = get_var_name_from_node(member.as_ref()); + let temp_var_name_base = + get_var_name_from_node(member.as_ref(), self.class_properties.ctx.debug); let property = ctx.ast.expression_string_literal(member.property.span, member.property.name, None); @@ -436,7 +437,8 @@ impl<'a> ClassPropertiesSuperConverter<'a, '_, '_> { unreachable!() }; - let temp_var_name_base = get_var_name_from_node(member.as_ref()); + let temp_var_name_base = + get_var_name_from_node(member.as_ref(), self.class_properties.ctx.debug); let property = member.expression.get_inner_expression_mut().take_in(ctx.ast.allocator); diff --git a/crates/oxc_transformer/src/lib.rs b/crates/oxc_transformer/src/lib.rs index df34094fd6be9..a08ad4796236e 100644 --- a/crates/oxc_transformer/src/lib.rs +++ b/crates/oxc_transformer/src/lib.rs @@ -12,7 +12,7 @@ use oxc_ast::{AstBuilder, ast::*}; use oxc_diagnostics::OxcDiagnostic; use oxc_semantic::Scoping; use oxc_span::SPAN; -use oxc_traverse::{Traverse, TraverseCtx, traverse_mut}; +use oxc_traverse::{ReusableTraverseCtx, Traverse, TraverseCtx, traverse_mut_with_ctx}; // Core mod common; @@ -157,7 +157,11 @@ impl<'a> Transformer<'a> { x4_regexp: RegExp::new(self.env.regexp, &self.ctx), }; - let scoping = traverse_mut(&mut transformer, allocator, program, scoping); + let mut traverse_ctx = + ReusableTraverseCtx::new_with_debug(scoping, allocator, self.ctx.debug); + traverse_mut_with_ctx(&mut transformer, program, &mut traverse_ctx); + let scoping = traverse_ctx.into_scoping(); + let helpers_used = self.ctx.helper_loader.used_helpers.borrow_mut().drain().collect(); #[expect(deprecated)] TransformerReturn { errors: self.ctx.take_errors(), scoping, helpers_used } diff --git a/crates/oxc_transformer/src/options/mod.rs b/crates/oxc_transformer/src/options/mod.rs index f0c48c53a9def..bdf357c1c89cb 100644 --- a/crates/oxc_transformer/src/options/mod.rs +++ b/crates/oxc_transformer/src/options/mod.rs @@ -46,6 +46,9 @@ pub struct TransformOptions { /// The working directory that all paths in the programmatic options will be resolved relative to. pub cwd: PathBuf, + /// If `true`, produces code with inserted UIDs in a more easily debuggable form. + pub debug: bool, + // Core /// Set assumptions in order to produce smaller output. /// For more information, check the [assumptions](https://babel.dev/docs/assumptions) documentation page. @@ -80,6 +83,7 @@ impl TransformOptions { pub fn enable_all() -> Self { Self { cwd: PathBuf::new(), + debug: false, assumptions: CompilerAssumptions::default(), typescript: TypeScriptOptions::default(), decorator: DecoratorOptions { legacy: true, emit_decorator_metadata: true }, @@ -260,6 +264,7 @@ impl TryFrom<&BabelOptions> for TransformOptions { Ok(Self { cwd: options.cwd.clone().unwrap_or_default(), + debug: false, assumptions: options.assumptions, typescript, decorator, diff --git a/crates/oxc_transformer/tests/integrations/es_target.rs b/crates/oxc_transformer/tests/integrations/es_target.rs index 7e39b59a7718f..9da4a19f2f7da 100644 --- a/crates/oxc_transformer/tests/integrations/es_target.rs +++ b/crates/oxc_transformer/tests/integrations/es_target.rs @@ -25,7 +25,8 @@ fn es_target() { ]; // Test no transformation for esnext. - let options = TransformOptions::from(ESTarget::from_str("esnext").unwrap()); + let mut options = TransformOptions::from(ESTarget::from_str("esnext").unwrap()); + options.debug = true; for (_, case) in cases { assert_eq!(test(case, &options), Ok(codegen(case, SourceType::mjs()))); } @@ -33,7 +34,8 @@ fn es_target() { #[cfg_attr(miri, expect(unused_variables))] let snapshot = cases.into_iter().enumerate().fold(String::new(), |mut w, (i, (target, case))| { - let options = TransformOptions::from_target(target).unwrap(); + let mut options = TransformOptions::from_target(target).unwrap(); + options.debug = true; let result = match test(case, &options) { Ok(code) => code, Err(errors) => errors diff --git a/crates/oxc_traverse/Cargo.toml b/crates/oxc_traverse/Cargo.toml index ad3e52b5a5ddd..3b3eafa57dc7d 100644 --- a/crates/oxc_traverse/Cargo.toml +++ b/crates/oxc_traverse/Cargo.toml @@ -26,7 +26,7 @@ doctest = true oxc_allocator = { workspace = true } oxc_ast = { workspace = true } oxc_ast_visit = { workspace = true } -oxc_data_structures = { workspace = true, features = ["stack"] } +oxc_data_structures = { workspace = true, features = ["assert_unchecked", "stack"] } oxc_ecmascript = { workspace = true } oxc_semantic = { workspace = true } oxc_span = { workspace = true } diff --git a/crates/oxc_traverse/src/ast_operations/gather_node_parts.rs b/crates/oxc_traverse/src/ast_operations/gather_node_parts.rs index 20c8c978a234c..e5adc228a289f 100644 --- a/crates/oxc_traverse/src/ast_operations/gather_node_parts.rs +++ b/crates/oxc_traverse/src/ast_operations/gather_node_parts.rs @@ -9,7 +9,11 @@ use oxc_ecmascript::BoundNames; use super::to_identifier; -pub fn get_var_name_from_node<'a, N: GatherNodeParts<'a>>(node: &N) -> String { +pub fn get_var_name_from_node<'a, N: GatherNodeParts<'a>>(node: &N, debug: bool) -> String { + if !debug { + return String::new(); + } + let mut name = String::new(); node.gather(&mut |mut part| { if name.is_empty() { diff --git a/crates/oxc_traverse/src/context/mod.rs b/crates/oxc_traverse/src/context/mod.rs index 99166464c1010..29f4d5047bfe3 100644 --- a/crates/oxc_traverse/src/context/mod.rs +++ b/crates/oxc_traverse/src/context/mod.rs @@ -457,7 +457,7 @@ impl<'a> TraverseCtx<'a> { scope_id: ScopeId, flags: SymbolFlags, ) -> BoundIdentifier<'a> { - let name = get_var_name_from_node(node); + let name = get_var_name_from_node(node, self.scoping.debug); self.generate_uid(&name, scope_id, flags) } @@ -492,7 +492,7 @@ impl<'a> TraverseCtx<'a> { &mut self, node: &N, ) -> BoundIdentifier<'a> { - let name = get_var_name_from_node(node); + let name = get_var_name_from_node(node, self.scoping.debug); self.generate_uid_in_current_hoist_scope(&name) } @@ -649,9 +649,9 @@ impl<'a> TraverseCtx<'a> { /// /// # SAFETY /// This function must not be public to maintain soundness of [`TraverseAncestry`]. - pub(crate) fn new(scoping: Scoping, allocator: &'a Allocator) -> Self { + pub(crate) fn new(scoping: Scoping, allocator: &'a Allocator, debug: bool) -> Self { let ancestry = TraverseAncestry::new(); - let scoping = TraverseScoping::new(scoping); + let scoping = TraverseScoping::new(scoping, debug); let ast = AstBuilder::new(allocator); Self { ancestry, scoping, ast } } diff --git a/crates/oxc_traverse/src/context/reusable.rs b/crates/oxc_traverse/src/context/reusable.rs index 81648531c03a3..b1396a035f357 100644 --- a/crates/oxc_traverse/src/context/reusable.rs +++ b/crates/oxc_traverse/src/context/reusable.rs @@ -21,7 +21,12 @@ pub struct ReusableTraverseCtx<'a>(TraverseCtx<'a>); impl<'a> ReusableTraverseCtx<'a> { /// Create new [`ReusableTraverseCtx`]. pub fn new(scoping: Scoping, allocator: &'a Allocator) -> Self { - Self(TraverseCtx::new(scoping, allocator)) + Self(TraverseCtx::new(scoping, allocator, false)) + } + + /// Create new [`ReusableTraverseCtx`] with `debug` flag. + pub fn new_with_debug(scoping: Scoping, allocator: &'a Allocator, debug: bool) -> Self { + Self(TraverseCtx::new(scoping, allocator, debug)) } /// Consume [`ReusableTraverseCtx`] and return [`Scoping`]. diff --git a/crates/oxc_traverse/src/context/scoping.rs b/crates/oxc_traverse/src/context/scoping.rs index f3d414544e166..855c537de2a81 100644 --- a/crates/oxc_traverse/src/context/scoping.rs +++ b/crates/oxc_traverse/src/context/scoping.rs @@ -24,6 +24,7 @@ use super::uid::UidGenerator; pub struct TraverseScoping<'a> { scoping: Scoping, uid_generator: Option>, + pub(super) debug: bool, current_scope_id: ScopeId, current_hoist_scope_id: ScopeId, current_block_scope_id: ScopeId, @@ -286,9 +287,21 @@ impl<'a> TraverseScoping<'a> { /// /// See comments on `UidGenerator` for further details. pub fn generate_uid_name(&mut self, name: &str, allocator: &'a Allocator) -> Atom<'a> { + if let Some(UidGenerator::Fast(uid_generator)) = &mut self.uid_generator { + uid_generator.create() + } else { + // `debug: true` is default and `FastUidGenerator` is only created once, so cold path + self.generate_uid_name_slow(name, allocator) + } + } + + #[cold] + #[inline(never)] + fn generate_uid_name_slow(&mut self, name: &str, allocator: &'a Allocator) -> Atom<'a> { // If `uid_generator` is not already populated, initialize it - let uid_generator = - self.uid_generator.get_or_insert_with(|| UidGenerator::new(&self.scoping, allocator)); + let uid_generator = self + .uid_generator + .get_or_insert_with(|| UidGenerator::new(self.debug, &self.scoping, allocator)); // Generate unique name uid_generator.create(name) } @@ -361,10 +374,11 @@ impl<'a> TraverseScoping<'a> { // Methods used internally within crate impl TraverseScoping<'_> { /// Create new `TraverseScoping` - pub(super) fn new(scoping: Scoping) -> Self { + pub(super) fn new(scoping: Scoping, debug: bool) -> Self { Self { scoping, uid_generator: None, + debug, // Dummy values. Both immediately overwritten in `walk_program`. current_scope_id: ScopeId::new(0), current_hoist_scope_id: ScopeId::new(0), diff --git a/crates/oxc_traverse/src/context/uid.rs b/crates/oxc_traverse/src/context/uid.rs index 58fb968ede694..d6cb7ac199b7c 100644 --- a/crates/oxc_traverse/src/context/uid.rs +++ b/crates/oxc_traverse/src/context/uid.rs @@ -1,18 +1,343 @@ -use std::{iter, str}; +use std::{cmp, iter, slice, str}; use itoa::Buffer as ItoaBuffer; use rustc_hash::FxHashMap; use oxc_allocator::{Allocator, String as ArenaString}; +use oxc_data_structures::assert_unchecked; use oxc_semantic::Scoping; use oxc_span::Atom; +/// Number of characters in range `a-z` or `A-Z` required to produce at least `u32::MAX` unique combinations +const POSTFIX_BYTES: usize = 6; +const _: () = { + #[expect(clippy::cast_possible_truncation)] + let max_combinations = 52u64.pow(POSTFIX_BYTES as u32); + assert!(max_combinations >= u32::MAX as u64); +}; + /// Unique identifier generator. /// -/// When initialized with [`UidGenerator::new`], creates a catalog of all symbols and unresolved references +/// Can be either [`FastUidGenerator`] or [`DebugUidGenerator`], +/// depending on `debug` param passed to [`UidGenerator::new`]. +#[expect(private_interfaces)] +pub enum UidGenerator<'a> { + Fast(FastUidGenerator<'a>), + Debug(DebugUidGenerator<'a>), +} + +impl<'a> UidGenerator<'a> { + /// Create [`UidGenerator`]. + /// + /// * If `debug` is `false`, returns a fast generator which produces UIDs of form `$a`, `$b` etc. + /// * If `debug` is `true`, returns a slower generator which produces UIDs better for debugging. + pub(super) fn new(debug: bool, scoping: &Scoping, allocator: &'a Allocator) -> Self { + if debug { + Self::Debug(DebugUidGenerator::new(scoping, allocator)) + } else { + Self::Fast(FastUidGenerator::new(scoping, allocator)) + } + } + + /// Create a unique identifier. + /// + /// This method will never return the same UID twice. + /// + /// The form of the UID depends on value of `debug` passed to [`UidGenerator::new`]. + /// + /// For more details, see: + /// + /// * [`FastUidGenerator::create`] + /// * [`DebugUidGenerator::create`] + pub(super) fn create(&mut self, name: &str) -> Atom<'a> { + match self { + Self::Fast(generator) => generator.create(), + Self::Debug(generator) => generator.create(name), + } + } +} + +/// Unique identifier generator which produces short var names, using a fast algorithm. +/// +/// [`FastUidGenerator::new`] searches all symbols and unresolved references in AST for those that +/// begin with `$`. It finds the longest `$` prefix. +/// +/// [`FastUidGenerator::create`] uses that information to generate a unique identifier which does not +/// clash with any existing name. +/// +/// Generated UIDs are `$a`, `$b`, ... `$z`, `$A`, `$B`, ... `$Z`, `$aa`, `$ab`, ... +/// +/// If AST already contains a symbol that begins with `$`, generated UIDs are `$$a`, `$$b`, etc. +/// If AST contains a symbol with a longer `$` prefix, generated UIDs are prefixed with 1 more `$` +/// than the longest. +/// e.g. existing symbol `$$$foo` -> UIDs `$$$$a`, `$$$$b`, etc. +/// In practice, long prefixes should be very rare. +/// +/// `$` is used as the prefix instead of `_`, because it's rare that JS code uses `$` in identifiers, +/// so makes it less likely that a long prefix is required. +/// +/// # Implementation details +/// +/// `FastUidGenerator` owns a small string buffer. +/// +/// Buffer starts as "$$$$$$`". +/// When generating a UID, the last byte is incremented. +/// i.e. "$$$$$$`" -> `$$$$$$a` -> `$$$$$$b` -> `$$$$$$c`. +/// +/// All the pointers stored in the type point to different places in that buffer: +/// +/// ```no_compile +/// $$$$abc +/// ^ `buffer_start_ptr` +/// ^ `active_ptr` +/// ^ `first_letter_ptr` +/// ^ `last_letter_ptr` +/// ``` +/// +/// "Active" part of the buffer is the section which is used as UID: +/// ```no_compile +/// Buffer: $$$$$$a +/// Active: ^^ +/// ``` +/// +/// 52nd UID is `$Z`, after which the UID grows in length to `$aa` ("rollover"). +/// The active part of the buffer expands in place: +/// ```no_compile +/// Buffer: $$$$$aa +/// Active: ^^^ +/// ``` +/// +/// This in place expansion means the buffer never has to reallocate. +/// +/// Using a pre-built string which is manually mutated (usually requiring just incrementing the last byte) +/// is more efficient than a `u32` counter which is converted to a string on each call to +/// [`FastUidGenerator::create`]. +/// +/// Using pointers to access the buffer makes the fast path for generating a UID (last byte is not `Z`, +/// so no "rollover" required) as cheap as possible - only a handful of instructions. +pub struct FastUidGenerator<'a> { + /// Pointer to start of buffer + buffer_start_ptr: *mut u8, + /// Pointer to start of active string in buffer + active_ptr: *const u8, + /// Pointer to first letter in buffer (excluding preceding `$`s) + first_letter_ptr: *const u8, + /// Pointer to last letter in buffer + last_letter_ptr: *mut u8, + /// Allocator + allocator: &'a Allocator, +} + +impl<'a> FastUidGenerator<'a> { + /// Create [`FastUidGenerator`]. + fn new(scoping: &Scoping, allocator: &'a Allocator) -> Self { + // Find the symbol or unresolved references with maximum number of `$`s on start + let mut dollar_count = 0; + let mut update_dollar_count = |name: &str| { + let this_dollar_count = + name.as_bytes().iter().position(|&b| b != b'$').unwrap_or(name.len()); + dollar_count = cmp::max(dollar_count, this_dollar_count); + }; + + for name in scoping.symbol_names() { + update_dollar_count(name); + } + for &name in scoping.root_unresolved_references().keys() { + update_dollar_count(name); + } + + // We will prefix UIDs with 1 more `$` than the longest `$` prefix in existing symbols + dollar_count += 1; + + // Create a buffer large enough to contain all possible UID names. + // Fill it with `$`s and a final "`". + // If `dollar_count` is 1 (no symbols found starting with a `$`), + // buffer contains "$$$$$$`" (7 bytes). + // If the maximum number of UIDs are created, buffer will end up containing + // `$ZZZZZZ` (also 7 bytes). + // If an existing symbol was found which starts with `$$`, buffer needs to be longer. + // Buffer will contain "$$$$$$$$`" (9 bytes). Maximum UID is `$$$ZZZZZZ` (also 9 bytes). + let len = dollar_count + POSTFIX_BYTES; + let mut buffer = String::with_capacity(len); + buffer.extend(iter::repeat_n('$', len - 1)); + buffer.push('`'); // "`" is the character before `a` + let buffer = buffer.into_boxed_str(); + + // Convert `Box` to pointer. + // We can't hold onto the `Box` because `Box` contains a `Unique` pointer and we want + // to access `buffer`'s data via raw pointers. + let buffer_start_ptr = Box::into_raw(buffer).cast::(); + + // Get pointer to last byte in `buffer` (which is currently "`"). + // SAFETY: `buffer` is `len` bytes long, and `len > 0`, so `len - 1` cannot be out of bounds. + let last_letter_ptr = unsafe { buffer_start_ptr.add(len - 1) }; + + // Get pointer to start of active string in `buffer`. + // If `dollar_count` is 1 (no symbols found starting with a `$`), active string is "$`". + // If `dollar_count` is 3 (symbol found starting with `$$`), active string is "$$$`". + // SAFETY: `last_letter_ptr` points to last byte in `buffer`. + // `buffer`'s length is `dollar_count + POSTFIX_BYTES`, and `POSTFIX_BYTES > 0`, + // so `last_letter_ptr - dollar_count` cannot be out of bounds of `buffer`. + let active_ptr = unsafe { last_letter_ptr.sub(dollar_count) }; + + Self { + buffer_start_ptr, + active_ptr, + first_letter_ptr: last_letter_ptr, + last_letter_ptr, + allocator, + } + } + + /// Create a unique identifier. + /// + /// UID will be of the form `$a`, with a sufficient number of dollars on start to avoid clash + /// with any existing var names. + /// + /// This method will never return the same UID twice. + #[inline] // `#[inline]` to inline into `TraverseCtx::generate_uid_name` + pub(super) fn create(&mut self) -> Atom<'a> { + // SAFETY: `last_letter_ptr` points to last byte of the buffer. + // All bytes of the buffer are initialized. No other references to buffer exist. + let last_letter = unsafe { self.last_letter_ptr.as_mut().unwrap_unchecked() }; + if (*last_letter | 32) < b'z' { + // `| 32` converts `A-Z` to lower case, so this matches `a-y` or `A-Y` or "`" + *last_letter += 1; + } else if *last_letter == b'z' { + *last_letter = b'A'; + } else { + debug_assert_eq!(*last_letter, b'Z'); + return self.rollover(); + } + + self.get_active() + } + + /// Create UID when last letter is `Z`, so the previous letter needs to be incremented. + /// + /// Marked `#[cold]` and `#[inline(never)]` as will only happen once every 52 UIDs. + #[cold] + #[inline(never)] + fn rollover(&mut self) -> Atom<'a> { + self.rollover_update(); + self.get_active() + } + + fn rollover_update(&mut self) { + let mut letter_ptr = self.last_letter_ptr; + + // SAFETY: `letter_ptr` starts pointing to last byte of buffer, and loop exits if it gets to + // `first_letter_ptr`, which also points to within buffer. So `letter_ptr` remains in bounds. + // All bytes in buffer are initialized, so reading any byte is valid. + unsafe { + loop { + // Set letter to `a` + let letter = letter_ptr.as_mut().unwrap_unchecked(); + *letter = b'a'; + + // If this is first letter, we need to add an extra letter + if letter_ptr.cast_const() == self.first_letter_ptr { + break; + } + + // Move back to previous letter + letter_ptr = letter_ptr.sub(1); + + // Increment letter + let letter = letter_ptr.as_mut().unwrap_unchecked(); + if (*letter | 32) < b'z' { + // `| 32` converts `A-Z` to lower case, so this matches `a-y` or `A-Y` + *letter += 1; + return; + } + if *letter == b'z' { + *letter = b'A'; + return; + } + + // Letter is `Z`. Need to change it to `a` and increment previous letter + debug_assert_eq!(*letter, b'Z'); + } + } + + // SAFETY: Loop above exited with `letter_ptr == first_letter_ptr`. + // There is always at least 1 `$` before 1st letter, so subtracting 1 is in bounds of buffer. + letter_ptr = unsafe { letter_ptr.sub(1) }; + // SAFETY: All bytes of buffer are initialized + let letter = unsafe { letter_ptr.as_mut().unwrap_unchecked() }; + debug_assert_eq!(*letter, b'$'); + + // We can only create a maximum of `POSTFIX_BYTES` letters. + // SAFETY: Buffer is originally created with length at least `POSTFIX_BYTES + 1`. + // `last_letter_ptr` points to the last byte so subtracting `POSTFIX_BYTES - 1` is in bounds. + let earliest_letter_ptr = unsafe { self.last_letter_ptr.sub(POSTFIX_BYTES - 1) }; + assert!(letter_ptr.cast_const() >= earliest_letter_ptr, "Created too many UIDs"); + + // Add another `a` on start (loop above has already converted all existing letters to `a`). + // So we started with `$ZZ` and now end up with `$aaa`. + *letter = b'a'; + + // Update pointer to first letter + self.first_letter_ptr = letter_ptr; + + // Extend active string forwards by 1 byte. + // SAFETY: Buffer is created with length `POSTFIX_BYTES + dollar_count`. + // `active_ptr` is `dollar_count` less than the first letter. + // We just increased number of letters by 1, and checked new number of letters does not + // exceed `POSTFIX_BYTES`, so `active_ptr - 1` cannot be before start of buffer. + self.active_ptr = unsafe { self.active_ptr.sub(1) }; + } + + /// Get the active string (current UID) and allocate into arena. Return UID as an [`Atom`]. + // + // `#[inline(always)]` to inline into `create`, to keep the path for no rollover as fast as possible + #[expect(clippy::inline_always)] + #[inline(always)] + fn get_active(&self) -> Atom<'a> { + // SAFETY: `active_ptr` points within buffer. `last_letter_ptr + 1` is end of buffer. + // The distance between the two is at least 2 bytes. + // All bytes in buffer are initialized. + // Buffer contains only ASCII bytes, so any slice of it is a valid UTF-8 string. + let uid = unsafe { + let end_ptr = self.last_letter_ptr.add(1).cast_const(); + assert_unchecked!(end_ptr > self.active_ptr); + #[expect(clippy::cast_sign_loss)] + let len = end_ptr.offset_from(self.active_ptr) as usize; + let slice = slice::from_raw_parts(self.active_ptr, len); + str::from_utf8_unchecked(slice) + }; + Atom::from(self.allocator.alloc_str(uid)) + } +} + +impl Drop for FastUidGenerator<'_> { + fn drop(&mut self) { + // Reconstitute the original `Box` created in `new`, and drop it. + // SAFETY: + // `buffer_start_ptr` points to start of the buffer. + // `last_letter_ptr` points to last byte of the buffer. + // So a slice from `buffer_start_ptr` to `last_letter_ptr + 1` is the whole buffer. + // All bytes in buffer are initialized, and buffer contains only ASCII bytes, + // so is a valid UTF-8 string. + // No other references to buffer exist, so safe to give ownership of it to a `Box`. + unsafe { + let end_ptr = self.last_letter_ptr.add(1); + assert_unchecked!(end_ptr > self.buffer_start_ptr); + #[expect(clippy::cast_sign_loss)] + let len = end_ptr.offset_from(self.buffer_start_ptr) as usize; + let slice = slice::from_raw_parts_mut(self.buffer_start_ptr, len); + let str = str::from_utf8_unchecked_mut(slice); + let _box = Box::from_raw(str); + } + } +} + +/// Unique identifier generator which produces debug-friendly variable names. +/// +/// When initialized with [`DebugUidGenerator::new`], creates a catalog of all symbols and unresolved references /// in the AST which begin with `_`. /// -/// [`UidGenerator::create`] uses that catalog to generate a unique identifier which does not clash with +/// [`DebugUidGenerator::create`] uses that catalog to generate a unique identifier which does not clash with /// any existing name. /// /// Such UIDs are based on the base name provided. They start with `_` and end with digits if required to @@ -69,54 +394,7 @@ use oxc_span::Atom; /// /// 5. Uses a slightly different algorithm for generating names (see above). /// The resulting UIDs are similar enough to Babel's algorithm to fail only 1 of Babel's tests. -/// -/// # Potential improvements -/// -/// TODO(improve-on-babel): -/// -/// UID generation is fairly expensive, because of the amount of string hashing required. -/// -/// [`UidGenerator::new`] iterates through every binding and unresolved reference in the entire AST, -/// and builds a hashmap of symbols which could clash with UIDs. -/// Once that's built, [`UidGenerator::create`] has to do at a hashmap lookup when generating each UID. -/// Hashing strings is a fairly expensive operation. -/// -/// We could improve this in one of 3 ways: -/// -/// ## 1. Build the hashmap in `SemanticBuilder` -/// -/// Instead of iterating through all symbols again here. -/// -/// ## 2. Use a simpler algorithm -/// -/// * During initial semantic pass, check for any existing identifiers starting with `_`. -/// * Calculate what is the highest postfix number on `_...` identifiers (e.g. `_foo1`, `_bar8`). -/// * Store that highest number in a counter which is global across the whole program. -/// * When creating a UID, increment the counter, and make the UID `_`. -/// -/// i.e. if source contains identifiers `_foo1` and `_bar15`, create UIDs named `_qux16`, -/// `_temp17` etc. They'll all be unique within the program. -/// -/// Minimal cost in semantic, and generating UIDs extremely cheap. -/// -/// The resulting UIDs would still be fairly readable. -/// -/// This is a different method from Babel, and unfortunately produces UID names -/// which differ from Babel for some of its test cases. -/// -/// ## 3. Even simpler algorithm, but produces hard-to-read code -/// -/// If output is being minified anyway, use a method which produces less debuggable output, -/// but is even simpler: -/// -/// * During initial semantic pass, check for any existing identifiers starting with `_`. -/// * Find the highest number of leading `_`s for any existing symbol. -/// * Generate UIDs with a counter starting at 0, prefixed with number of `_`s one greater than -/// what was found in AST. -/// -/// i.e. if source contains identifiers `_foo` and `__bar`, create UIDs names `___0`, `___1`, -/// `___2` etc. They'll all be unique within the program. -pub struct UidGenerator<'a> { +struct DebugUidGenerator<'a> { names: FxHashMap<&'a str, UidName>, allocator: &'a Allocator, } @@ -135,9 +413,9 @@ struct UidName { underscore_count: u32, } -impl<'a> UidGenerator<'a> { - /// Create [`UidGenerator`]. - pub(super) fn new(scoping: &Scoping, allocator: &'a Allocator) -> Self { +impl<'a> DebugUidGenerator<'a> { + /// Create [`DebugUidGenerator`]. + fn new(scoping: &Scoping, allocator: &'a Allocator) -> Self { let mut generator = Self { names: FxHashMap::default(), allocator }; for name in scoping.symbol_names() { @@ -150,7 +428,7 @@ impl<'a> UidGenerator<'a> { generator } - /// Add a record to [`UidGenerator`]. + /// Add a record to [`DebugUidGenerator`]. fn add(&mut self, name: &str) { // If `name` does not start with `_`, exit if name.as_bytes().first() != Some(&b'_') { @@ -244,8 +522,8 @@ impl<'a> UidGenerator<'a> { /// The fact that a `_` will be prepended on start means providing an empty string or a string /// starting with a digit (0-9) is fine. /// - /// Please see docs for [`UidGenerator`] for further info. - pub(super) fn create(&mut self, name: &str) -> Atom<'a> { + /// Please see docs for [`DebugUidGenerator`] for further info. + fn create(&mut self, name: &str) -> Atom<'a> { // Get the base name, with `_`s trimmed from start, and digits trimmed from end. // i.e. `__foo123` -> `foo`. // Equivalent to `name.trim_start_matches('_').trim_end_matches(|c: char| c.is_ascii_digit())` @@ -311,7 +589,158 @@ impl<'a> UidGenerator<'a> { #[cfg(test)] #[test] -fn uids() { +fn fast_uids() { + use oxc_span::SPAN; + use oxc_syntax::{node::NodeId, scope::ScopeId, symbol::SymbolFlags}; + + // (&[ initial, ... ], &[ expected_uid, ... ]) + #[rustfmt::skip] + let cases: &[(&[&str], &[&str])] = &[ + ( + &[], + &[ + "$a", "$b", "$c", "$d", "$e", "$f", "$g", "$h", "$i", "$j", "$k", "$l", "$m", + "$n", "$o", "$p", "$q", "$r", "$s", "$t", "$u", "$v", "$w", "$x", "$y", "$z", + "$A", "$B", "$C", "$D", "$E", "$F", "$G", "$H", "$I", "$J", "$K", "$L", "$M", + "$N", "$O", "$P", "$Q", "$R", "$S", "$T", "$U", "$V", "$W", "$X", "$Y", "$Z", + "$aa", "$ab", "$ac", "$ad", "$ae", "$af", "$ag", "$ah", "$ai", "$aj", "$ak", "$al", "$am", + "$an", "$ao", "$ap", "$aq", "$ar", "$as", "$at", "$au", "$av", "$aw", "$ax", "$ay", "$az", + "$aA", "$aB", "$aC", "$aD", "$aE", "$aF", "$aG", "$aH", "$aI", "$aJ", "$aK", "$aL", "$aM", + "$aN", "$aO", "$aP", "$aQ", "$aR", "$aS", "$aT", "$aU", "$aV", "$aW", "$aX", "$aY", "$aZ", + "$ba", "$bb", "$bc", "$bd", "$be", "$bf", "$bg", "$bh", "$bi", "$bj", "$bk", "$bl", "$bm", + "$bn", "$bo", "$bp", "$bq", "$br", "$bs", "$bt", "$bu", "$bv", "$bw", "$bx", "$by", "$bz", + "$bA", "$bB", "$bC", "$bD", "$bE", "$bF", "$bG", "$bH", "$bI", "$bJ", "$bK", "$bL", "$bM", + "$bN", "$bO", "$bP", "$bQ", "$bR", "$bS", "$bT", "$bU", "$bV", "$bW", "$bX", "$bY", "$bZ", + "$ca", + ], + ), + ( + &["foo", "bar$", "_$qux"], + &[ + "$a", "$b", "$c", "$d", "$e", "$f", "$g", "$h", "$i", "$j", "$k", "$l", "$m", + "$n", "$o", "$p", "$q", "$r", "$s", "$t", "$u", "$v", "$w", "$x", "$y", "$z", + "$A", "$B", "$C", "$D", "$E", "$F", "$G", "$H", "$I", "$J", "$K", "$L", "$M", + "$N", "$O", "$P", "$Q", "$R", "$S", "$T", "$U", "$V", "$W", "$X", "$Y", "$Z", + "$aa", "$ab", "$ac", "$ad", "$ae", "$af", "$ag", "$ah", "$ai", "$aj", "$ak", "$al", "$am", + "$an", "$ao", "$ap", "$aq", "$ar", "$as", "$at", "$au", "$av", "$aw", "$ax", "$ay", "$az", + "$aA", "$aB", "$aC", "$aD", "$aE", "$aF", "$aG", "$aH", "$aI", "$aJ", "$aK", "$aL", "$aM", + "$aN", "$aO", "$aP", "$aQ", "$aR", "$aS", "$aT", "$aU", "$aV", "$aW", "$aX", "$aY", "$aZ", + "$ba", "$bb", "$bc", "$bd", "$be", "$bf", "$bg", "$bh", "$bi", "$bj", "$bk", "$bl", "$bm", + "$bn", "$bo", "$bp", "$bq", "$br", "$bs", "$bt", "$bu", "$bv", "$bw", "$bx", "$by", "$bz", + "$bA", "$bB", "$bC", "$bD", "$bE", "$bF", "$bG", "$bH", "$bI", "$bJ", "$bK", "$bL", "$bM", + "$bN", "$bO", "$bP", "$bQ", "$bR", "$bS", "$bT", "$bU", "$bV", "$bW", "$bX", "$bY", "$bZ", + "$ca", + ], + ), + ( + &["$"], + &[ + "$$a", "$$b", "$$c", "$$d", "$$e", "$$f", "$$g", "$$h", "$$i", "$$j", "$$k", "$$l", "$$m", + "$$n", "$$o", "$$p", "$$q", "$$r", "$$s", "$$t", "$$u", "$$v", "$$w", "$$x", "$$y", "$$z", + "$$A", "$$B", "$$C", "$$D", "$$E", "$$F", "$$G", "$$H", "$$I", "$$J", "$$K", "$$L", "$$M", + "$$N", "$$O", "$$P", "$$Q", "$$R", "$$S", "$$T", "$$U", "$$V", "$$W", "$$X", "$$Y", "$$Z", + "$$aa", "$$ab", "$$ac", "$$ad", "$$ae", "$$af", "$$ag", "$$ah", "$$ai", "$$aj", "$$ak", "$$al", "$$am", + "$$an", "$$ao", "$$ap", "$$aq", "$$ar", "$$as", "$$at", "$$au", "$$av", "$$aw", "$$ax", "$$ay", "$$az", + "$$aA", "$$aB", "$$aC", "$$aD", "$$aE", "$$aF", "$$aG", "$$aH", "$$aI", "$$aJ", "$$aK", "$$aL", "$$aM", + "$$aN", "$$aO", "$$aP", "$$aQ", "$$aR", "$$aS", "$$aT", "$$aU", "$$aV", "$$aW", "$$aX", "$$aY", "$$aZ", + "$$ba", "$$bb", "$$bc", "$$bd", "$$be", "$$bf", "$$bg", "$$bh", "$$bi", "$$bj", "$$bk", "$$bl", "$$bm", + "$$bn", "$$bo", "$$bp", "$$bq", "$$br", "$$bs", "$$bt", "$$bu", "$$bv", "$$bw", "$$bx", "$$by", "$$bz", + "$$bA", "$$bB", "$$bC", "$$bD", "$$bE", "$$bF", "$$bG", "$$bH", "$$bI", "$$bJ", "$$bK", "$$bL", "$$bM", + "$$bN", "$$bO", "$$bP", "$$bQ", "$$bR", "$$bS", "$$bT", "$$bU", "$$bV", "$$bW", "$$bX", "$$bY", "$$bZ", + "$$ca", + ], + ), + ( + &["$foo"], + &[ + "$$a", "$$b", "$$c", "$$d", "$$e", "$$f", "$$g", "$$h", "$$i", "$$j", "$$k", "$$l", "$$m", + "$$n", "$$o", "$$p", "$$q", "$$r", "$$s", "$$t", "$$u", "$$v", "$$w", "$$x", "$$y", "$$z", + "$$A", "$$B", "$$C", "$$D", "$$E", "$$F", "$$G", "$$H", "$$I", "$$J", "$$K", "$$L", "$$M", + "$$N", "$$O", "$$P", "$$Q", "$$R", "$$S", "$$T", "$$U", "$$V", "$$W", "$$X", "$$Y", "$$Z", + "$$aa", "$$ab", "$$ac", "$$ad", "$$ae", "$$af", "$$ag", "$$ah", "$$ai", "$$aj", "$$ak", "$$al", "$$am", + "$$an", "$$ao", "$$ap", "$$aq", "$$ar", "$$as", "$$at", "$$au", "$$av", "$$aw", "$$ax", "$$ay", "$$az", + "$$aA", "$$aB", "$$aC", "$$aD", "$$aE", "$$aF", "$$aG", "$$aH", "$$aI", "$$aJ", "$$aK", "$$aL", "$$aM", + "$$aN", "$$aO", "$$aP", "$$aQ", "$$aR", "$$aS", "$$aT", "$$aU", "$$aV", "$$aW", "$$aX", "$$aY", "$$aZ", + "$$ba", "$$bb", "$$bc", "$$bd", "$$be", "$$bf", "$$bg", "$$bh", "$$bi", "$$bj", "$$bk", "$$bl", "$$bm", + "$$bn", "$$bo", "$$bp", "$$bq", "$$br", "$$bs", "$$bt", "$$bu", "$$bv", "$$bw", "$$bx", "$$by", "$$bz", + "$$bA", "$$bB", "$$bC", "$$bD", "$$bE", "$$bF", "$$bG", "$$bH", "$$bI", "$$bJ", "$$bK", "$$bL", "$$bM", + "$$bN", "$$bO", "$$bP", "$$bQ", "$$bR", "$$bS", "$$bT", "$$bU", "$$bV", "$$bW", "$$bX", "$$bY", "$$bZ", + "$$ca", + ], + ), + ( + &["$$$"], + &[ + "$$$$a", "$$$$b", "$$$$c", "$$$$d", "$$$$e", "$$$$f", "$$$$g", "$$$$h", "$$$$i", "$$$$j", "$$$$k", "$$$$l", "$$$$m", + "$$$$n", "$$$$o", "$$$$p", "$$$$q", "$$$$r", "$$$$s", "$$$$t", "$$$$u", "$$$$v", "$$$$w", "$$$$x", "$$$$y", "$$$$z", + "$$$$A", "$$$$B", "$$$$C", "$$$$D", "$$$$E", "$$$$F", "$$$$G", "$$$$H", "$$$$I", "$$$$J", "$$$$K", "$$$$L", "$$$$M", + "$$$$N", "$$$$O", "$$$$P", "$$$$Q", "$$$$R", "$$$$S", "$$$$T", "$$$$U", "$$$$V", "$$$$W", "$$$$X", "$$$$Y", "$$$$Z", + "$$$$aa", "$$$$ab", "$$$$ac", "$$$$ad", "$$$$ae", "$$$$af", "$$$$ag", "$$$$ah", "$$$$ai", "$$$$aj", "$$$$ak", "$$$$al", "$$$$am", + "$$$$an", "$$$$ao", "$$$$ap", "$$$$aq", "$$$$ar", "$$$$as", "$$$$at", "$$$$au", "$$$$av", "$$$$aw", "$$$$ax", "$$$$ay", "$$$$az", + "$$$$aA", "$$$$aB", "$$$$aC", "$$$$aD", "$$$$aE", "$$$$aF", "$$$$aG", "$$$$aH", "$$$$aI", "$$$$aJ", "$$$$aK", "$$$$aL", "$$$$aM", + "$$$$aN", "$$$$aO", "$$$$aP", "$$$$aQ", "$$$$aR", "$$$$aS", "$$$$aT", "$$$$aU", "$$$$aV", "$$$$aW", "$$$$aX", "$$$$aY", "$$$$aZ", + "$$$$ba", "$$$$bb", "$$$$bc", "$$$$bd", "$$$$be", "$$$$bf", "$$$$bg", "$$$$bh", "$$$$bi", "$$$$bj", "$$$$bk", "$$$$bl", "$$$$bm", + "$$$$bn", "$$$$bo", "$$$$bp", "$$$$bq", "$$$$br", "$$$$bs", "$$$$bt", "$$$$bu", "$$$$bv", "$$$$bw", "$$$$bx", "$$$$by", "$$$$bz", + "$$$$bA", "$$$$bB", "$$$$bC", "$$$$bD", "$$$$bE", "$$$$bF", "$$$$bG", "$$$$bH", "$$$$bI", "$$$$bJ", "$$$$bK", "$$$$bL", "$$$$bM", + "$$$$bN", "$$$$bO", "$$$$bP", "$$$$bQ", "$$$$bR", "$$$$bS", "$$$$bT", "$$$$bU", "$$$$bV", "$$$$bW", "$$$$bX", "$$$$bY", "$$$$bZ", + "$$$$ca", + ], + ), + ( + &["$$$foo"], + &[ + "$$$$a", "$$$$b", "$$$$c", "$$$$d", "$$$$e", "$$$$f", "$$$$g", "$$$$h", "$$$$i", "$$$$j", "$$$$k", "$$$$l", "$$$$m", + "$$$$n", "$$$$o", "$$$$p", "$$$$q", "$$$$r", "$$$$s", "$$$$t", "$$$$u", "$$$$v", "$$$$w", "$$$$x", "$$$$y", "$$$$z", + "$$$$A", "$$$$B", "$$$$C", "$$$$D", "$$$$E", "$$$$F", "$$$$G", "$$$$H", "$$$$I", "$$$$J", "$$$$K", "$$$$L", "$$$$M", + "$$$$N", "$$$$O", "$$$$P", "$$$$Q", "$$$$R", "$$$$S", "$$$$T", "$$$$U", "$$$$V", "$$$$W", "$$$$X", "$$$$Y", "$$$$Z", + "$$$$aa", "$$$$ab", "$$$$ac", "$$$$ad", "$$$$ae", "$$$$af", "$$$$ag", "$$$$ah", "$$$$ai", "$$$$aj", "$$$$ak", "$$$$al", "$$$$am", + "$$$$an", "$$$$ao", "$$$$ap", "$$$$aq", "$$$$ar", "$$$$as", "$$$$at", "$$$$au", "$$$$av", "$$$$aw", "$$$$ax", "$$$$ay", "$$$$az", + "$$$$aA", "$$$$aB", "$$$$aC", "$$$$aD", "$$$$aE", "$$$$aF", "$$$$aG", "$$$$aH", "$$$$aI", "$$$$aJ", "$$$$aK", "$$$$aL", "$$$$aM", + "$$$$aN", "$$$$aO", "$$$$aP", "$$$$aQ", "$$$$aR", "$$$$aS", "$$$$aT", "$$$$aU", "$$$$aV", "$$$$aW", "$$$$aX", "$$$$aY", "$$$$aZ", + "$$$$ba", "$$$$bb", "$$$$bc", "$$$$bd", "$$$$be", "$$$$bf", "$$$$bg", "$$$$bh", "$$$$bi", "$$$$bj", "$$$$bk", "$$$$bl", "$$$$bm", + "$$$$bn", "$$$$bo", "$$$$bp", "$$$$bq", "$$$$br", "$$$$bs", "$$$$bt", "$$$$bu", "$$$$bv", "$$$$bw", "$$$$bx", "$$$$by", "$$$$bz", + "$$$$bA", "$$$$bB", "$$$$bC", "$$$$bD", "$$$$bE", "$$$$bF", "$$$$bG", "$$$$bH", "$$$$bI", "$$$$bJ", "$$$$bK", "$$$$bL", "$$$$bM", + "$$$$bN", "$$$$bO", "$$$$bP", "$$$$bQ", "$$$$bR", "$$$$bS", "$$$$bT", "$$$$bU", "$$$$bV", "$$$$bW", "$$$$bX", "$$$$bY", "$$$$bZ", + "$$$$ca", + ], + ), + ( + &["$$$foo", "$a"], + &[ + "$$$$a", "$$$$b", "$$$$c", "$$$$d", "$$$$e", "$$$$f", "$$$$g", "$$$$h", "$$$$i", "$$$$j", "$$$$k", "$$$$l", "$$$$m", + "$$$$n", "$$$$o", "$$$$p", "$$$$q", "$$$$r", "$$$$s", "$$$$t", "$$$$u", "$$$$v", "$$$$w", "$$$$x", "$$$$y", "$$$$z", + "$$$$A", "$$$$B", "$$$$C", "$$$$D", "$$$$E", "$$$$F", "$$$$G", "$$$$H", "$$$$I", "$$$$J", "$$$$K", "$$$$L", "$$$$M", + "$$$$N", "$$$$O", "$$$$P", "$$$$Q", "$$$$R", "$$$$S", "$$$$T", "$$$$U", "$$$$V", "$$$$W", "$$$$X", "$$$$Y", "$$$$Z", + "$$$$aa", "$$$$ab", "$$$$ac", "$$$$ad", "$$$$ae", "$$$$af", "$$$$ag", "$$$$ah", "$$$$ai", "$$$$aj", "$$$$ak", "$$$$al", "$$$$am", + "$$$$an", "$$$$ao", "$$$$ap", "$$$$aq", "$$$$ar", "$$$$as", "$$$$at", "$$$$au", "$$$$av", "$$$$aw", "$$$$ax", "$$$$ay", "$$$$az", + "$$$$aA", "$$$$aB", "$$$$aC", "$$$$aD", "$$$$aE", "$$$$aF", "$$$$aG", "$$$$aH", "$$$$aI", "$$$$aJ", "$$$$aK", "$$$$aL", "$$$$aM", + "$$$$aN", "$$$$aO", "$$$$aP", "$$$$aQ", "$$$$aR", "$$$$aS", "$$$$aT", "$$$$aU", "$$$$aV", "$$$$aW", "$$$$aX", "$$$$aY", "$$$$aZ", + "$$$$ba", "$$$$bb", "$$$$bc", "$$$$bd", "$$$$be", "$$$$bf", "$$$$bg", "$$$$bh", "$$$$bi", "$$$$bj", "$$$$bk", "$$$$bl", "$$$$bm", + "$$$$bn", "$$$$bo", "$$$$bp", "$$$$bq", "$$$$br", "$$$$bs", "$$$$bt", "$$$$bu", "$$$$bv", "$$$$bw", "$$$$bx", "$$$$by", "$$$$bz", + "$$$$bA", "$$$$bB", "$$$$bC", "$$$$bD", "$$$$bE", "$$$$bF", "$$$$bG", "$$$$bH", "$$$$bI", "$$$$bJ", "$$$$bK", "$$$$bL", "$$$$bM", + "$$$$bN", "$$$$bO", "$$$$bP", "$$$$bQ", "$$$$bR", "$$$$bS", "$$$$bT", "$$$$bU", "$$$$bV", "$$$$bW", "$$$$bX", "$$$$bY", "$$$$bZ", + "$$$$ca", + ], + ), + ]; + + let allocator = Allocator::default(); + for &(used_names, created) in cases { + let mut scoping = Scoping::default(); + for &name in used_names { + scoping.create_symbol(SPAN, name, SymbolFlags::empty(), ScopeId::new(0), NodeId::DUMMY); + } + + let mut generator = FastUidGenerator::new(&scoping, &allocator); + for &expected_uid in created { + assert_eq!(generator.create(), expected_uid); + } + } +} + +#[cfg(test)] +#[test] +fn debug_uids() { // (&[ initial, ... ], &[ (name, expected_uid), ... ]) #[expect(clippy::type_complexity)] let cases: &[(&[&str], &[(&str, &str)])] = &[ @@ -364,7 +793,8 @@ fn uids() { let allocator = Allocator::default(); for &(used_names, created) in cases { - let mut generator = UidGenerator { names: FxHashMap::default(), allocator: &allocator }; + let mut generator = + DebugUidGenerator { names: FxHashMap::default(), allocator: &allocator }; for &used_name in used_names { generator.add(used_name); } diff --git a/napi/playground/src/lib.rs b/napi/playground/src/lib.rs index 2f15d0b58d3be..324759e376a4b 100644 --- a/napi/playground/src/lib.rs +++ b/napi/playground/src/lib.rs @@ -189,7 +189,7 @@ impl Oxc { return Ok(()); } - let options = transform_options + let mut options = transform_options .target .as_ref() .and_then(|target| { @@ -200,6 +200,7 @@ impl Oxc { .ok() }) .unwrap_or_default(); + options.debug = true; let result = Transformer::new(&allocator, &path, &options) .build_with_scoping(scoping, &mut program); if !result.errors.is_empty() { diff --git a/napi/transform/index.d.ts b/napi/transform/index.d.ts index d04d6df0a5f59..0660246f1d496 100644 --- a/napi/transform/index.d.ts +++ b/napi/transform/index.d.ts @@ -369,6 +369,8 @@ export interface TransformOptions { * options. */ cwd?: string + /** If `true` produces more debuggable output */ + debug?: boolean /** * Enable source map generation. * diff --git a/napi/transform/src/transformer.rs b/napi/transform/src/transformer.rs index fe89272c9534e..914cfa9d7d450 100644 --- a/napi/transform/src/transformer.rs +++ b/napi/transform/src/transformer.rs @@ -97,6 +97,9 @@ pub struct TransformOptions { /// options. pub cwd: Option, + /// If `true` produces more debuggable output + pub debug: Option, + /// Enable source map generation. /// /// When `true`, the `sourceMap` field of transform result objects will be populated. @@ -156,6 +159,7 @@ impl TryFrom for oxc::transformer::TransformOptions { }; Ok(Self { cwd: options.cwd.map(PathBuf::from).unwrap_or_default(), + debug: options.debug.is_some_and(|debug| debug), assumptions: options.assumptions.map(Into::into).unwrap_or_default(), typescript: options .typescript diff --git a/napi/transform/test/transform.test.ts b/napi/transform/test/transform.test.ts index 82a5a9231c12e..08870a35b11f2 100644 --- a/napi/transform/test/transform.test.ts +++ b/napi/transform/test/transform.test.ts @@ -1,7 +1,12 @@ import { Worker } from 'node:worker_threads'; import { describe, expect, it, test } from 'vitest'; -import { HelperMode, transform } from '../index'; +import { HelperMode, transform as transformOriginal } from '../index'; + +function transform(filename, code, ...args) { + const options = { debug: true, ...args[0] }; + return transformOriginal(filename, code, options); +} describe('simple', () => { const code = 'export class A {}'; diff --git a/tasks/coverage/src/tools/semantic.rs b/tasks/coverage/src/tools/semantic.rs index 8ad756ef49305..c83682da03dd6 100644 --- a/tasks/coverage/src/tools/semantic.rs +++ b/tasks/coverage/src/tools/semantic.rs @@ -16,6 +16,7 @@ use crate::{ fn get_default_transformer_options() -> TransformOptions { TransformOptions { + debug: true, jsx: JsxOptions { jsx_plugin: true, jsx_self_plugin: true, diff --git a/tasks/transform_conformance/src/test_case.rs b/tasks/transform_conformance/src/test_case.rs index d4bad7884d0fe..4abedf09af465 100644 --- a/tasks/transform_conformance/src/test_case.rs +++ b/tasks/transform_conformance/src/test_case.rs @@ -52,7 +52,10 @@ impl TestCase { let mut options = BabelOptions::from_test_path(options_directory_path.as_path()); options.cwd.replace(cwd.to_path_buf()); - let transform_options = TransformOptions::try_from(&options); + let transform_options = TransformOptions::try_from(&options).map(|mut options| { + options.debug = true; + options + }); let path = path.to_path_buf(); let errors = vec![]; From bec06c03915cc39172552d98b335e6d439452454 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sat, 3 May 2025 10:41:08 +0100 Subject: [PATCH 2/8] Separate `create_str` method --- crates/oxc_traverse/src/context/uid.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/crates/oxc_traverse/src/context/uid.rs b/crates/oxc_traverse/src/context/uid.rs index d6cb7ac199b7c..cf6e060580533 100644 --- a/crates/oxc_traverse/src/context/uid.rs +++ b/crates/oxc_traverse/src/context/uid.rs @@ -197,6 +197,13 @@ impl<'a> FastUidGenerator<'a> { /// This method will never return the same UID twice. #[inline] // `#[inline]` to inline into `TraverseCtx::generate_uid_name` pub(super) fn create(&mut self) -> Atom<'a> { + let allocator = self.allocator; + let uid = self.create_str(); + Atom::from(allocator.alloc_str(uid)) + } + + /// Create UID as `&str`. + fn create_str(&mut self) -> &str { // SAFETY: `last_letter_ptr` points to last byte of the buffer. // All bytes of the buffer are initialized. No other references to buffer exist. let last_letter = unsafe { self.last_letter_ptr.as_mut().unwrap_unchecked() }; @@ -218,7 +225,7 @@ impl<'a> FastUidGenerator<'a> { /// Marked `#[cold]` and `#[inline(never)]` as will only happen once every 52 UIDs. #[cold] #[inline(never)] - fn rollover(&mut self) -> Atom<'a> { + fn rollover(&mut self) -> &str { self.rollover_update(); self.get_active() } @@ -288,25 +295,24 @@ impl<'a> FastUidGenerator<'a> { self.active_ptr = unsafe { self.active_ptr.sub(1) }; } - /// Get the active string (current UID) and allocate into arena. Return UID as an [`Atom`]. + /// Get the active string (UID). // // `#[inline(always)]` to inline into `create`, to keep the path for no rollover as fast as possible #[expect(clippy::inline_always)] #[inline(always)] - fn get_active(&self) -> Atom<'a> { + fn get_active(&self) -> &str { // SAFETY: `active_ptr` points within buffer. `last_letter_ptr + 1` is end of buffer. // The distance between the two is at least 2 bytes. // All bytes in buffer are initialized. // Buffer contains only ASCII bytes, so any slice of it is a valid UTF-8 string. - let uid = unsafe { + unsafe { let end_ptr = self.last_letter_ptr.add(1).cast_const(); assert_unchecked!(end_ptr > self.active_ptr); #[expect(clippy::cast_sign_loss)] let len = end_ptr.offset_from(self.active_ptr) as usize; let slice = slice::from_raw_parts(self.active_ptr, len); str::from_utf8_unchecked(slice) - }; - Atom::from(self.allocator.alloc_str(uid)) + } } } From f98a5992cdc6de28356e08a2982947428d0f7d17 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sat, 3 May 2025 15:23:48 +0100 Subject: [PATCH 3/8] Revert "Separate `create_str` method" This reverts commit 4e463f49fa6a2ce2513f04cc3d765314fc9c5ce7. --- crates/oxc_traverse/src/context/uid.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/crates/oxc_traverse/src/context/uid.rs b/crates/oxc_traverse/src/context/uid.rs index cf6e060580533..d6cb7ac199b7c 100644 --- a/crates/oxc_traverse/src/context/uid.rs +++ b/crates/oxc_traverse/src/context/uid.rs @@ -197,13 +197,6 @@ impl<'a> FastUidGenerator<'a> { /// This method will never return the same UID twice. #[inline] // `#[inline]` to inline into `TraverseCtx::generate_uid_name` pub(super) fn create(&mut self) -> Atom<'a> { - let allocator = self.allocator; - let uid = self.create_str(); - Atom::from(allocator.alloc_str(uid)) - } - - /// Create UID as `&str`. - fn create_str(&mut self) -> &str { // SAFETY: `last_letter_ptr` points to last byte of the buffer. // All bytes of the buffer are initialized. No other references to buffer exist. let last_letter = unsafe { self.last_letter_ptr.as_mut().unwrap_unchecked() }; @@ -225,7 +218,7 @@ impl<'a> FastUidGenerator<'a> { /// Marked `#[cold]` and `#[inline(never)]` as will only happen once every 52 UIDs. #[cold] #[inline(never)] - fn rollover(&mut self) -> &str { + fn rollover(&mut self) -> Atom<'a> { self.rollover_update(); self.get_active() } @@ -295,24 +288,25 @@ impl<'a> FastUidGenerator<'a> { self.active_ptr = unsafe { self.active_ptr.sub(1) }; } - /// Get the active string (UID). + /// Get the active string (current UID) and allocate into arena. Return UID as an [`Atom`]. // // `#[inline(always)]` to inline into `create`, to keep the path for no rollover as fast as possible #[expect(clippy::inline_always)] #[inline(always)] - fn get_active(&self) -> &str { + fn get_active(&self) -> Atom<'a> { // SAFETY: `active_ptr` points within buffer. `last_letter_ptr + 1` is end of buffer. // The distance between the two is at least 2 bytes. // All bytes in buffer are initialized. // Buffer contains only ASCII bytes, so any slice of it is a valid UTF-8 string. - unsafe { + let uid = unsafe { let end_ptr = self.last_letter_ptr.add(1).cast_const(); assert_unchecked!(end_ptr > self.active_ptr); #[expect(clippy::cast_sign_loss)] let len = end_ptr.offset_from(self.active_ptr) as usize; let slice = slice::from_raw_parts(self.active_ptr, len); str::from_utf8_unchecked(slice) - } + }; + Atom::from(self.allocator.alloc_str(uid)) } } From fd37a53e458bedeac3329b3e269621224987480b Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sat, 3 May 2025 22:12:22 +0100 Subject: [PATCH 4/8] Remove `first_letter_ptr` --- crates/oxc_traverse/src/context/uid.rs | 41 ++++++++------------------ 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/crates/oxc_traverse/src/context/uid.rs b/crates/oxc_traverse/src/context/uid.rs index d6cb7ac199b7c..489de3b4a7fd1 100644 --- a/crates/oxc_traverse/src/context/uid.rs +++ b/crates/oxc_traverse/src/context/uid.rs @@ -90,7 +90,6 @@ impl<'a> UidGenerator<'a> { /// $$$$abc /// ^ `buffer_start_ptr` /// ^ `active_ptr` -/// ^ `first_letter_ptr` /// ^ `last_letter_ptr` /// ``` /// @@ -120,8 +119,6 @@ pub struct FastUidGenerator<'a> { buffer_start_ptr: *mut u8, /// Pointer to start of active string in buffer active_ptr: *const u8, - /// Pointer to first letter in buffer (excluding preceding `$`s) - first_letter_ptr: *const u8, /// Pointer to last letter in buffer last_letter_ptr: *mut u8, /// Allocator @@ -180,13 +177,7 @@ impl<'a> FastUidGenerator<'a> { // so `last_letter_ptr - dollar_count` cannot be out of bounds of `buffer`. let active_ptr = unsafe { last_letter_ptr.sub(dollar_count) }; - Self { - buffer_start_ptr, - active_ptr, - first_letter_ptr: last_letter_ptr, - last_letter_ptr, - allocator, - } + Self { buffer_start_ptr, active_ptr, last_letter_ptr, allocator } } /// Create a unique identifier. @@ -226,8 +217,9 @@ impl<'a> FastUidGenerator<'a> { fn rollover_update(&mut self) { let mut letter_ptr = self.last_letter_ptr; - // SAFETY: `letter_ptr` starts pointing to last byte of buffer, and loop exits if it gets to - // `first_letter_ptr`, which also points to within buffer. So `letter_ptr` remains in bounds. + // SAFETY: `letter_ptr` starts pointing to last byte of buffer, and is decremented. + // Loop exits if it gets to `$`. There's always at least one `$` at start of buffer, + // so the loop can't run beyond the start. // All bytes in buffer are initialized, so reading any byte is valid. unsafe { loop { @@ -235,16 +227,16 @@ impl<'a> FastUidGenerator<'a> { let letter = letter_ptr.as_mut().unwrap_unchecked(); *letter = b'a'; - // If this is first letter, we need to add an extra letter - if letter_ptr.cast_const() == self.first_letter_ptr { - break; - } - // Move back to previous letter letter_ptr = letter_ptr.sub(1); + let letter = letter_ptr.as_mut().unwrap_unchecked(); + + // If we've reached `$`, need to extend active string + if *letter == b'$' { + break; + } // Increment letter - let letter = letter_ptr.as_mut().unwrap_unchecked(); if (*letter | 32) < b'z' { // `| 32` converts `A-Z` to lower case, so this matches `a-y` or `A-Y` *letter += 1; @@ -260,13 +252,7 @@ impl<'a> FastUidGenerator<'a> { } } - // SAFETY: Loop above exited with `letter_ptr == first_letter_ptr`. - // There is always at least 1 `$` before 1st letter, so subtracting 1 is in bounds of buffer. - letter_ptr = unsafe { letter_ptr.sub(1) }; - // SAFETY: All bytes of buffer are initialized - let letter = unsafe { letter_ptr.as_mut().unwrap_unchecked() }; - debug_assert_eq!(*letter, b'$'); - + // Extend active string. // We can only create a maximum of `POSTFIX_BYTES` letters. // SAFETY: Buffer is originally created with length at least `POSTFIX_BYTES + 1`. // `last_letter_ptr` points to the last byte so subtracting `POSTFIX_BYTES - 1` is in bounds. @@ -275,11 +261,10 @@ impl<'a> FastUidGenerator<'a> { // Add another `a` on start (loop above has already converted all existing letters to `a`). // So we started with `$ZZ` and now end up with `$aaa`. + // SAFETY: `letter_ptr` is in bounds of buffer. All bytes of buffer are initialized. + let letter = unsafe { letter_ptr.as_mut().unwrap_unchecked() }; *letter = b'a'; - // Update pointer to first letter - self.first_letter_ptr = letter_ptr; - // Extend active string forwards by 1 byte. // SAFETY: Buffer is created with length `POSTFIX_BYTES + dollar_count`. // `active_ptr` is `dollar_count` less than the first letter. From ebc27308feb926d6cc296b446317fe2bba0fccb2 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sat, 3 May 2025 22:34:17 +0100 Subject: [PATCH 5/8] Use less letters --- crates/oxc_traverse/src/context/uid.rs | 107 +++++++------------------ 1 file changed, 28 insertions(+), 79 deletions(-) diff --git a/crates/oxc_traverse/src/context/uid.rs b/crates/oxc_traverse/src/context/uid.rs index 489de3b4a7fd1..bd899a9ef3ac7 100644 --- a/crates/oxc_traverse/src/context/uid.rs +++ b/crates/oxc_traverse/src/context/uid.rs @@ -8,11 +8,11 @@ use oxc_data_structures::assert_unchecked; use oxc_semantic::Scoping; use oxc_span::Atom; -/// Number of characters in range `a-z` or `A-Z` required to produce at least `u32::MAX` unique combinations -const POSTFIX_BYTES: usize = 6; +/// Number of characters in range `a-z` required to produce at least `u32::MAX` unique combinations +const POSTFIX_BYTES: usize = 7; const _: () = { #[expect(clippy::cast_possible_truncation)] - let max_combinations = 52u64.pow(POSTFIX_BYTES as u32); + let max_combinations = 26u64.pow(POSTFIX_BYTES as u32); assert!(max_combinations >= u32::MAX as u64); }; @@ -65,7 +65,7 @@ impl<'a> UidGenerator<'a> { /// [`FastUidGenerator::create`] uses that information to generate a unique identifier which does not /// clash with any existing name. /// -/// Generated UIDs are `$a`, `$b`, ... `$z`, `$A`, `$B`, ... `$Z`, `$aa`, `$ab`, ... +/// Generated UIDs are `$a`, `$b`, ... `$z`, `$aa`, `$ab`, ... /// /// If AST already contains a symbol that begins with `$`, generated UIDs are `$$a`, `$$b`, etc. /// If AST contains a symbol with a longer `$` prefix, generated UIDs are prefixed with 1 more `$` @@ -80,30 +80,30 @@ impl<'a> UidGenerator<'a> { /// /// `FastUidGenerator` owns a small string buffer. /// -/// Buffer starts as "$$$$$$`". +/// Buffer starts as "$$$$$$$`". /// When generating a UID, the last byte is incremented. -/// i.e. "$$$$$$`" -> `$$$$$$a` -> `$$$$$$b` -> `$$$$$$c`. +/// i.e. "$$$$$$$`" -> `$$$$$$$a` -> `$$$$$$$b` -> `$$$$$$$c`. /// /// All the pointers stored in the type point to different places in that buffer: /// /// ```no_compile -/// $$$$abc -/// ^ `buffer_start_ptr` -/// ^ `active_ptr` -/// ^ `last_letter_ptr` +/// $$$$$abc +/// ^ `buffer_start_ptr` +/// ^ `active_ptr` +/// ^ `last_letter_ptr` /// ``` /// /// "Active" part of the buffer is the section which is used as UID: /// ```no_compile -/// Buffer: $$$$$$a -/// Active: ^^ +/// Buffer: $$$$$$$a +/// Active: ^^ /// ``` /// -/// 52nd UID is `$Z`, after which the UID grows in length to `$aa` ("rollover"). +/// 26th UID is `$z`, after which the UID grows in length to `$aa` ("rollover"). /// The active part of the buffer expands in place: /// ```no_compile -/// Buffer: $$$$$aa -/// Active: ^^^ +/// Buffer: $$$$$$aa +/// Active: ^^^ /// ``` /// /// This in place expansion means the buffer never has to reallocate. @@ -112,7 +112,7 @@ impl<'a> UidGenerator<'a> { /// is more efficient than a `u32` counter which is converted to a string on each call to /// [`FastUidGenerator::create`]. /// -/// Using pointers to access the buffer makes the fast path for generating a UID (last byte is not `Z`, +/// Using pointers to access the buffer makes the fast path for generating a UID (last byte is not `z`, /// so no "rollover" required) as cheap as possible - only a handful of instructions. pub struct FastUidGenerator<'a> { /// Pointer to start of buffer @@ -149,11 +149,11 @@ impl<'a> FastUidGenerator<'a> { // Create a buffer large enough to contain all possible UID names. // Fill it with `$`s and a final "`". // If `dollar_count` is 1 (no symbols found starting with a `$`), - // buffer contains "$$$$$$`" (7 bytes). + // buffer contains "$$$$$$$`" (8 bytes). // If the maximum number of UIDs are created, buffer will end up containing - // `$ZZZZZZ` (also 7 bytes). + // `$zzzzzzz` (also 8 bytes). // If an existing symbol was found which starts with `$$`, buffer needs to be longer. - // Buffer will contain "$$$$$$$$`" (9 bytes). Maximum UID is `$$$ZZZZZZ` (also 9 bytes). + // Buffer will contain "$$$$$$$$$`" (10 bytes). Maximum UID is `$$$zzzzzzz` (also 10 bytes). let len = dollar_count + POSTFIX_BYTES; let mut buffer = String::with_capacity(len); buffer.extend(iter::repeat_n('$', len - 1)); @@ -191,22 +191,19 @@ impl<'a> FastUidGenerator<'a> { // SAFETY: `last_letter_ptr` points to last byte of the buffer. // All bytes of the buffer are initialized. No other references to buffer exist. let last_letter = unsafe { self.last_letter_ptr.as_mut().unwrap_unchecked() }; - if (*last_letter | 32) < b'z' { - // `| 32` converts `A-Z` to lower case, so this matches `a-y` or `A-Y` or "`" - *last_letter += 1; - } else if *last_letter == b'z' { - *last_letter = b'A'; - } else { - debug_assert_eq!(*last_letter, b'Z'); + if *last_letter == b'z' { return self.rollover(); } + // Increment last letter i.e. `a` -> `b` + *last_letter += 1; + self.get_active() } - /// Create UID when last letter is `Z`, so the previous letter needs to be incremented. + /// Create UID when last letter is `z`, so the previous letter needs to be incremented. /// - /// Marked `#[cold]` and `#[inline(never)]` as will only happen once every 52 UIDs. + /// Marked `#[cold]` and `#[inline(never)]` as will only happen once every 26 UIDs. #[cold] #[inline(never)] fn rollover(&mut self) -> Atom<'a> { @@ -237,18 +234,12 @@ impl<'a> FastUidGenerator<'a> { } // Increment letter - if (*letter | 32) < b'z' { - // `| 32` converts `A-Z` to lower case, so this matches `a-y` or `A-Y` + if *letter != b'z' { *letter += 1; return; } - if *letter == b'z' { - *letter = b'A'; - return; - } - // Letter is `Z`. Need to change it to `a` and increment previous letter - debug_assert_eq!(*letter, b'Z'); + // Letter is `z`. Need to change it to `a` and increment previous letter } } @@ -260,7 +251,7 @@ impl<'a> FastUidGenerator<'a> { assert!(letter_ptr.cast_const() >= earliest_letter_ptr, "Created too many UIDs"); // Add another `a` on start (loop above has already converted all existing letters to `a`). - // So we started with `$ZZ` and now end up with `$aaa`. + // So we started with `$zz` and now end up with `$aaa`. // SAFETY: `letter_ptr` is in bounds of buffer. All bytes of buffer are initialized. let letter = unsafe { letter_ptr.as_mut().unwrap_unchecked() }; *letter = b'a'; @@ -586,16 +577,10 @@ fn fast_uids() { &[ "$a", "$b", "$c", "$d", "$e", "$f", "$g", "$h", "$i", "$j", "$k", "$l", "$m", "$n", "$o", "$p", "$q", "$r", "$s", "$t", "$u", "$v", "$w", "$x", "$y", "$z", - "$A", "$B", "$C", "$D", "$E", "$F", "$G", "$H", "$I", "$J", "$K", "$L", "$M", - "$N", "$O", "$P", "$Q", "$R", "$S", "$T", "$U", "$V", "$W", "$X", "$Y", "$Z", "$aa", "$ab", "$ac", "$ad", "$ae", "$af", "$ag", "$ah", "$ai", "$aj", "$ak", "$al", "$am", "$an", "$ao", "$ap", "$aq", "$ar", "$as", "$at", "$au", "$av", "$aw", "$ax", "$ay", "$az", - "$aA", "$aB", "$aC", "$aD", "$aE", "$aF", "$aG", "$aH", "$aI", "$aJ", "$aK", "$aL", "$aM", - "$aN", "$aO", "$aP", "$aQ", "$aR", "$aS", "$aT", "$aU", "$aV", "$aW", "$aX", "$aY", "$aZ", "$ba", "$bb", "$bc", "$bd", "$be", "$bf", "$bg", "$bh", "$bi", "$bj", "$bk", "$bl", "$bm", "$bn", "$bo", "$bp", "$bq", "$br", "$bs", "$bt", "$bu", "$bv", "$bw", "$bx", "$by", "$bz", - "$bA", "$bB", "$bC", "$bD", "$bE", "$bF", "$bG", "$bH", "$bI", "$bJ", "$bK", "$bL", "$bM", - "$bN", "$bO", "$bP", "$bQ", "$bR", "$bS", "$bT", "$bU", "$bV", "$bW", "$bX", "$bY", "$bZ", "$ca", ], ), @@ -604,16 +589,10 @@ fn fast_uids() { &[ "$a", "$b", "$c", "$d", "$e", "$f", "$g", "$h", "$i", "$j", "$k", "$l", "$m", "$n", "$o", "$p", "$q", "$r", "$s", "$t", "$u", "$v", "$w", "$x", "$y", "$z", - "$A", "$B", "$C", "$D", "$E", "$F", "$G", "$H", "$I", "$J", "$K", "$L", "$M", - "$N", "$O", "$P", "$Q", "$R", "$S", "$T", "$U", "$V", "$W", "$X", "$Y", "$Z", "$aa", "$ab", "$ac", "$ad", "$ae", "$af", "$ag", "$ah", "$ai", "$aj", "$ak", "$al", "$am", "$an", "$ao", "$ap", "$aq", "$ar", "$as", "$at", "$au", "$av", "$aw", "$ax", "$ay", "$az", - "$aA", "$aB", "$aC", "$aD", "$aE", "$aF", "$aG", "$aH", "$aI", "$aJ", "$aK", "$aL", "$aM", - "$aN", "$aO", "$aP", "$aQ", "$aR", "$aS", "$aT", "$aU", "$aV", "$aW", "$aX", "$aY", "$aZ", "$ba", "$bb", "$bc", "$bd", "$be", "$bf", "$bg", "$bh", "$bi", "$bj", "$bk", "$bl", "$bm", "$bn", "$bo", "$bp", "$bq", "$br", "$bs", "$bt", "$bu", "$bv", "$bw", "$bx", "$by", "$bz", - "$bA", "$bB", "$bC", "$bD", "$bE", "$bF", "$bG", "$bH", "$bI", "$bJ", "$bK", "$bL", "$bM", - "$bN", "$bO", "$bP", "$bQ", "$bR", "$bS", "$bT", "$bU", "$bV", "$bW", "$bX", "$bY", "$bZ", "$ca", ], ), @@ -622,16 +601,10 @@ fn fast_uids() { &[ "$$a", "$$b", "$$c", "$$d", "$$e", "$$f", "$$g", "$$h", "$$i", "$$j", "$$k", "$$l", "$$m", "$$n", "$$o", "$$p", "$$q", "$$r", "$$s", "$$t", "$$u", "$$v", "$$w", "$$x", "$$y", "$$z", - "$$A", "$$B", "$$C", "$$D", "$$E", "$$F", "$$G", "$$H", "$$I", "$$J", "$$K", "$$L", "$$M", - "$$N", "$$O", "$$P", "$$Q", "$$R", "$$S", "$$T", "$$U", "$$V", "$$W", "$$X", "$$Y", "$$Z", "$$aa", "$$ab", "$$ac", "$$ad", "$$ae", "$$af", "$$ag", "$$ah", "$$ai", "$$aj", "$$ak", "$$al", "$$am", "$$an", "$$ao", "$$ap", "$$aq", "$$ar", "$$as", "$$at", "$$au", "$$av", "$$aw", "$$ax", "$$ay", "$$az", - "$$aA", "$$aB", "$$aC", "$$aD", "$$aE", "$$aF", "$$aG", "$$aH", "$$aI", "$$aJ", "$$aK", "$$aL", "$$aM", - "$$aN", "$$aO", "$$aP", "$$aQ", "$$aR", "$$aS", "$$aT", "$$aU", "$$aV", "$$aW", "$$aX", "$$aY", "$$aZ", "$$ba", "$$bb", "$$bc", "$$bd", "$$be", "$$bf", "$$bg", "$$bh", "$$bi", "$$bj", "$$bk", "$$bl", "$$bm", "$$bn", "$$bo", "$$bp", "$$bq", "$$br", "$$bs", "$$bt", "$$bu", "$$bv", "$$bw", "$$bx", "$$by", "$$bz", - "$$bA", "$$bB", "$$bC", "$$bD", "$$bE", "$$bF", "$$bG", "$$bH", "$$bI", "$$bJ", "$$bK", "$$bL", "$$bM", - "$$bN", "$$bO", "$$bP", "$$bQ", "$$bR", "$$bS", "$$bT", "$$bU", "$$bV", "$$bW", "$$bX", "$$bY", "$$bZ", "$$ca", ], ), @@ -640,16 +613,10 @@ fn fast_uids() { &[ "$$a", "$$b", "$$c", "$$d", "$$e", "$$f", "$$g", "$$h", "$$i", "$$j", "$$k", "$$l", "$$m", "$$n", "$$o", "$$p", "$$q", "$$r", "$$s", "$$t", "$$u", "$$v", "$$w", "$$x", "$$y", "$$z", - "$$A", "$$B", "$$C", "$$D", "$$E", "$$F", "$$G", "$$H", "$$I", "$$J", "$$K", "$$L", "$$M", - "$$N", "$$O", "$$P", "$$Q", "$$R", "$$S", "$$T", "$$U", "$$V", "$$W", "$$X", "$$Y", "$$Z", "$$aa", "$$ab", "$$ac", "$$ad", "$$ae", "$$af", "$$ag", "$$ah", "$$ai", "$$aj", "$$ak", "$$al", "$$am", "$$an", "$$ao", "$$ap", "$$aq", "$$ar", "$$as", "$$at", "$$au", "$$av", "$$aw", "$$ax", "$$ay", "$$az", - "$$aA", "$$aB", "$$aC", "$$aD", "$$aE", "$$aF", "$$aG", "$$aH", "$$aI", "$$aJ", "$$aK", "$$aL", "$$aM", - "$$aN", "$$aO", "$$aP", "$$aQ", "$$aR", "$$aS", "$$aT", "$$aU", "$$aV", "$$aW", "$$aX", "$$aY", "$$aZ", "$$ba", "$$bb", "$$bc", "$$bd", "$$be", "$$bf", "$$bg", "$$bh", "$$bi", "$$bj", "$$bk", "$$bl", "$$bm", "$$bn", "$$bo", "$$bp", "$$bq", "$$br", "$$bs", "$$bt", "$$bu", "$$bv", "$$bw", "$$bx", "$$by", "$$bz", - "$$bA", "$$bB", "$$bC", "$$bD", "$$bE", "$$bF", "$$bG", "$$bH", "$$bI", "$$bJ", "$$bK", "$$bL", "$$bM", - "$$bN", "$$bO", "$$bP", "$$bQ", "$$bR", "$$bS", "$$bT", "$$bU", "$$bV", "$$bW", "$$bX", "$$bY", "$$bZ", "$$ca", ], ), @@ -658,16 +625,10 @@ fn fast_uids() { &[ "$$$$a", "$$$$b", "$$$$c", "$$$$d", "$$$$e", "$$$$f", "$$$$g", "$$$$h", "$$$$i", "$$$$j", "$$$$k", "$$$$l", "$$$$m", "$$$$n", "$$$$o", "$$$$p", "$$$$q", "$$$$r", "$$$$s", "$$$$t", "$$$$u", "$$$$v", "$$$$w", "$$$$x", "$$$$y", "$$$$z", - "$$$$A", "$$$$B", "$$$$C", "$$$$D", "$$$$E", "$$$$F", "$$$$G", "$$$$H", "$$$$I", "$$$$J", "$$$$K", "$$$$L", "$$$$M", - "$$$$N", "$$$$O", "$$$$P", "$$$$Q", "$$$$R", "$$$$S", "$$$$T", "$$$$U", "$$$$V", "$$$$W", "$$$$X", "$$$$Y", "$$$$Z", "$$$$aa", "$$$$ab", "$$$$ac", "$$$$ad", "$$$$ae", "$$$$af", "$$$$ag", "$$$$ah", "$$$$ai", "$$$$aj", "$$$$ak", "$$$$al", "$$$$am", "$$$$an", "$$$$ao", "$$$$ap", "$$$$aq", "$$$$ar", "$$$$as", "$$$$at", "$$$$au", "$$$$av", "$$$$aw", "$$$$ax", "$$$$ay", "$$$$az", - "$$$$aA", "$$$$aB", "$$$$aC", "$$$$aD", "$$$$aE", "$$$$aF", "$$$$aG", "$$$$aH", "$$$$aI", "$$$$aJ", "$$$$aK", "$$$$aL", "$$$$aM", - "$$$$aN", "$$$$aO", "$$$$aP", "$$$$aQ", "$$$$aR", "$$$$aS", "$$$$aT", "$$$$aU", "$$$$aV", "$$$$aW", "$$$$aX", "$$$$aY", "$$$$aZ", "$$$$ba", "$$$$bb", "$$$$bc", "$$$$bd", "$$$$be", "$$$$bf", "$$$$bg", "$$$$bh", "$$$$bi", "$$$$bj", "$$$$bk", "$$$$bl", "$$$$bm", "$$$$bn", "$$$$bo", "$$$$bp", "$$$$bq", "$$$$br", "$$$$bs", "$$$$bt", "$$$$bu", "$$$$bv", "$$$$bw", "$$$$bx", "$$$$by", "$$$$bz", - "$$$$bA", "$$$$bB", "$$$$bC", "$$$$bD", "$$$$bE", "$$$$bF", "$$$$bG", "$$$$bH", "$$$$bI", "$$$$bJ", "$$$$bK", "$$$$bL", "$$$$bM", - "$$$$bN", "$$$$bO", "$$$$bP", "$$$$bQ", "$$$$bR", "$$$$bS", "$$$$bT", "$$$$bU", "$$$$bV", "$$$$bW", "$$$$bX", "$$$$bY", "$$$$bZ", "$$$$ca", ], ), @@ -676,16 +637,10 @@ fn fast_uids() { &[ "$$$$a", "$$$$b", "$$$$c", "$$$$d", "$$$$e", "$$$$f", "$$$$g", "$$$$h", "$$$$i", "$$$$j", "$$$$k", "$$$$l", "$$$$m", "$$$$n", "$$$$o", "$$$$p", "$$$$q", "$$$$r", "$$$$s", "$$$$t", "$$$$u", "$$$$v", "$$$$w", "$$$$x", "$$$$y", "$$$$z", - "$$$$A", "$$$$B", "$$$$C", "$$$$D", "$$$$E", "$$$$F", "$$$$G", "$$$$H", "$$$$I", "$$$$J", "$$$$K", "$$$$L", "$$$$M", - "$$$$N", "$$$$O", "$$$$P", "$$$$Q", "$$$$R", "$$$$S", "$$$$T", "$$$$U", "$$$$V", "$$$$W", "$$$$X", "$$$$Y", "$$$$Z", "$$$$aa", "$$$$ab", "$$$$ac", "$$$$ad", "$$$$ae", "$$$$af", "$$$$ag", "$$$$ah", "$$$$ai", "$$$$aj", "$$$$ak", "$$$$al", "$$$$am", "$$$$an", "$$$$ao", "$$$$ap", "$$$$aq", "$$$$ar", "$$$$as", "$$$$at", "$$$$au", "$$$$av", "$$$$aw", "$$$$ax", "$$$$ay", "$$$$az", - "$$$$aA", "$$$$aB", "$$$$aC", "$$$$aD", "$$$$aE", "$$$$aF", "$$$$aG", "$$$$aH", "$$$$aI", "$$$$aJ", "$$$$aK", "$$$$aL", "$$$$aM", - "$$$$aN", "$$$$aO", "$$$$aP", "$$$$aQ", "$$$$aR", "$$$$aS", "$$$$aT", "$$$$aU", "$$$$aV", "$$$$aW", "$$$$aX", "$$$$aY", "$$$$aZ", "$$$$ba", "$$$$bb", "$$$$bc", "$$$$bd", "$$$$be", "$$$$bf", "$$$$bg", "$$$$bh", "$$$$bi", "$$$$bj", "$$$$bk", "$$$$bl", "$$$$bm", "$$$$bn", "$$$$bo", "$$$$bp", "$$$$bq", "$$$$br", "$$$$bs", "$$$$bt", "$$$$bu", "$$$$bv", "$$$$bw", "$$$$bx", "$$$$by", "$$$$bz", - "$$$$bA", "$$$$bB", "$$$$bC", "$$$$bD", "$$$$bE", "$$$$bF", "$$$$bG", "$$$$bH", "$$$$bI", "$$$$bJ", "$$$$bK", "$$$$bL", "$$$$bM", - "$$$$bN", "$$$$bO", "$$$$bP", "$$$$bQ", "$$$$bR", "$$$$bS", "$$$$bT", "$$$$bU", "$$$$bV", "$$$$bW", "$$$$bX", "$$$$bY", "$$$$bZ", "$$$$ca", ], ), @@ -694,16 +649,10 @@ fn fast_uids() { &[ "$$$$a", "$$$$b", "$$$$c", "$$$$d", "$$$$e", "$$$$f", "$$$$g", "$$$$h", "$$$$i", "$$$$j", "$$$$k", "$$$$l", "$$$$m", "$$$$n", "$$$$o", "$$$$p", "$$$$q", "$$$$r", "$$$$s", "$$$$t", "$$$$u", "$$$$v", "$$$$w", "$$$$x", "$$$$y", "$$$$z", - "$$$$A", "$$$$B", "$$$$C", "$$$$D", "$$$$E", "$$$$F", "$$$$G", "$$$$H", "$$$$I", "$$$$J", "$$$$K", "$$$$L", "$$$$M", - "$$$$N", "$$$$O", "$$$$P", "$$$$Q", "$$$$R", "$$$$S", "$$$$T", "$$$$U", "$$$$V", "$$$$W", "$$$$X", "$$$$Y", "$$$$Z", "$$$$aa", "$$$$ab", "$$$$ac", "$$$$ad", "$$$$ae", "$$$$af", "$$$$ag", "$$$$ah", "$$$$ai", "$$$$aj", "$$$$ak", "$$$$al", "$$$$am", "$$$$an", "$$$$ao", "$$$$ap", "$$$$aq", "$$$$ar", "$$$$as", "$$$$at", "$$$$au", "$$$$av", "$$$$aw", "$$$$ax", "$$$$ay", "$$$$az", - "$$$$aA", "$$$$aB", "$$$$aC", "$$$$aD", "$$$$aE", "$$$$aF", "$$$$aG", "$$$$aH", "$$$$aI", "$$$$aJ", "$$$$aK", "$$$$aL", "$$$$aM", - "$$$$aN", "$$$$aO", "$$$$aP", "$$$$aQ", "$$$$aR", "$$$$aS", "$$$$aT", "$$$$aU", "$$$$aV", "$$$$aW", "$$$$aX", "$$$$aY", "$$$$aZ", "$$$$ba", "$$$$bb", "$$$$bc", "$$$$bd", "$$$$be", "$$$$bf", "$$$$bg", "$$$$bh", "$$$$bi", "$$$$bj", "$$$$bk", "$$$$bl", "$$$$bm", "$$$$bn", "$$$$bo", "$$$$bp", "$$$$bq", "$$$$br", "$$$$bs", "$$$$bt", "$$$$bu", "$$$$bv", "$$$$bw", "$$$$bx", "$$$$by", "$$$$bz", - "$$$$bA", "$$$$bB", "$$$$bC", "$$$$bD", "$$$$bE", "$$$$bF", "$$$$bG", "$$$$bH", "$$$$bI", "$$$$bJ", "$$$$bK", "$$$$bL", "$$$$bM", - "$$$$bN", "$$$$bO", "$$$$bP", "$$$$bQ", "$$$$bR", "$$$$bS", "$$$$bT", "$$$$bU", "$$$$bV", "$$$$bW", "$$$$bX", "$$$$bY", "$$$$bZ", "$$$$ca", ], ), From a1689eb70f93eec6638aa7cdf1e89216893f6a79 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sat, 3 May 2025 22:47:16 +0100 Subject: [PATCH 6/8] Revert "Use less letters" This reverts commit ebc27308feb926d6cc296b446317fe2bba0fccb2. --- crates/oxc_traverse/src/context/uid.rs | 107 ++++++++++++++++++------- 1 file changed, 79 insertions(+), 28 deletions(-) diff --git a/crates/oxc_traverse/src/context/uid.rs b/crates/oxc_traverse/src/context/uid.rs index bd899a9ef3ac7..489de3b4a7fd1 100644 --- a/crates/oxc_traverse/src/context/uid.rs +++ b/crates/oxc_traverse/src/context/uid.rs @@ -8,11 +8,11 @@ use oxc_data_structures::assert_unchecked; use oxc_semantic::Scoping; use oxc_span::Atom; -/// Number of characters in range `a-z` required to produce at least `u32::MAX` unique combinations -const POSTFIX_BYTES: usize = 7; +/// Number of characters in range `a-z` or `A-Z` required to produce at least `u32::MAX` unique combinations +const POSTFIX_BYTES: usize = 6; const _: () = { #[expect(clippy::cast_possible_truncation)] - let max_combinations = 26u64.pow(POSTFIX_BYTES as u32); + let max_combinations = 52u64.pow(POSTFIX_BYTES as u32); assert!(max_combinations >= u32::MAX as u64); }; @@ -65,7 +65,7 @@ impl<'a> UidGenerator<'a> { /// [`FastUidGenerator::create`] uses that information to generate a unique identifier which does not /// clash with any existing name. /// -/// Generated UIDs are `$a`, `$b`, ... `$z`, `$aa`, `$ab`, ... +/// Generated UIDs are `$a`, `$b`, ... `$z`, `$A`, `$B`, ... `$Z`, `$aa`, `$ab`, ... /// /// If AST already contains a symbol that begins with `$`, generated UIDs are `$$a`, `$$b`, etc. /// If AST contains a symbol with a longer `$` prefix, generated UIDs are prefixed with 1 more `$` @@ -80,30 +80,30 @@ impl<'a> UidGenerator<'a> { /// /// `FastUidGenerator` owns a small string buffer. /// -/// Buffer starts as "$$$$$$$`". +/// Buffer starts as "$$$$$$`". /// When generating a UID, the last byte is incremented. -/// i.e. "$$$$$$$`" -> `$$$$$$$a` -> `$$$$$$$b` -> `$$$$$$$c`. +/// i.e. "$$$$$$`" -> `$$$$$$a` -> `$$$$$$b` -> `$$$$$$c`. /// /// All the pointers stored in the type point to different places in that buffer: /// /// ```no_compile -/// $$$$$abc -/// ^ `buffer_start_ptr` -/// ^ `active_ptr` -/// ^ `last_letter_ptr` +/// $$$$abc +/// ^ `buffer_start_ptr` +/// ^ `active_ptr` +/// ^ `last_letter_ptr` /// ``` /// /// "Active" part of the buffer is the section which is used as UID: /// ```no_compile -/// Buffer: $$$$$$$a -/// Active: ^^ +/// Buffer: $$$$$$a +/// Active: ^^ /// ``` /// -/// 26th UID is `$z`, after which the UID grows in length to `$aa` ("rollover"). +/// 52nd UID is `$Z`, after which the UID grows in length to `$aa` ("rollover"). /// The active part of the buffer expands in place: /// ```no_compile -/// Buffer: $$$$$$aa -/// Active: ^^^ +/// Buffer: $$$$$aa +/// Active: ^^^ /// ``` /// /// This in place expansion means the buffer never has to reallocate. @@ -112,7 +112,7 @@ impl<'a> UidGenerator<'a> { /// is more efficient than a `u32` counter which is converted to a string on each call to /// [`FastUidGenerator::create`]. /// -/// Using pointers to access the buffer makes the fast path for generating a UID (last byte is not `z`, +/// Using pointers to access the buffer makes the fast path for generating a UID (last byte is not `Z`, /// so no "rollover" required) as cheap as possible - only a handful of instructions. pub struct FastUidGenerator<'a> { /// Pointer to start of buffer @@ -149,11 +149,11 @@ impl<'a> FastUidGenerator<'a> { // Create a buffer large enough to contain all possible UID names. // Fill it with `$`s and a final "`". // If `dollar_count` is 1 (no symbols found starting with a `$`), - // buffer contains "$$$$$$$`" (8 bytes). + // buffer contains "$$$$$$`" (7 bytes). // If the maximum number of UIDs are created, buffer will end up containing - // `$zzzzzzz` (also 8 bytes). + // `$ZZZZZZ` (also 7 bytes). // If an existing symbol was found which starts with `$$`, buffer needs to be longer. - // Buffer will contain "$$$$$$$$$`" (10 bytes). Maximum UID is `$$$zzzzzzz` (also 10 bytes). + // Buffer will contain "$$$$$$$$`" (9 bytes). Maximum UID is `$$$ZZZZZZ` (also 9 bytes). let len = dollar_count + POSTFIX_BYTES; let mut buffer = String::with_capacity(len); buffer.extend(iter::repeat_n('$', len - 1)); @@ -191,19 +191,22 @@ impl<'a> FastUidGenerator<'a> { // SAFETY: `last_letter_ptr` points to last byte of the buffer. // All bytes of the buffer are initialized. No other references to buffer exist. let last_letter = unsafe { self.last_letter_ptr.as_mut().unwrap_unchecked() }; - if *last_letter == b'z' { + if (*last_letter | 32) < b'z' { + // `| 32` converts `A-Z` to lower case, so this matches `a-y` or `A-Y` or "`" + *last_letter += 1; + } else if *last_letter == b'z' { + *last_letter = b'A'; + } else { + debug_assert_eq!(*last_letter, b'Z'); return self.rollover(); } - // Increment last letter i.e. `a` -> `b` - *last_letter += 1; - self.get_active() } - /// Create UID when last letter is `z`, so the previous letter needs to be incremented. + /// Create UID when last letter is `Z`, so the previous letter needs to be incremented. /// - /// Marked `#[cold]` and `#[inline(never)]` as will only happen once every 26 UIDs. + /// Marked `#[cold]` and `#[inline(never)]` as will only happen once every 52 UIDs. #[cold] #[inline(never)] fn rollover(&mut self) -> Atom<'a> { @@ -234,12 +237,18 @@ impl<'a> FastUidGenerator<'a> { } // Increment letter - if *letter != b'z' { + if (*letter | 32) < b'z' { + // `| 32` converts `A-Z` to lower case, so this matches `a-y` or `A-Y` *letter += 1; return; } + if *letter == b'z' { + *letter = b'A'; + return; + } - // Letter is `z`. Need to change it to `a` and increment previous letter + // Letter is `Z`. Need to change it to `a` and increment previous letter + debug_assert_eq!(*letter, b'Z'); } } @@ -251,7 +260,7 @@ impl<'a> FastUidGenerator<'a> { assert!(letter_ptr.cast_const() >= earliest_letter_ptr, "Created too many UIDs"); // Add another `a` on start (loop above has already converted all existing letters to `a`). - // So we started with `$zz` and now end up with `$aaa`. + // So we started with `$ZZ` and now end up with `$aaa`. // SAFETY: `letter_ptr` is in bounds of buffer. All bytes of buffer are initialized. let letter = unsafe { letter_ptr.as_mut().unwrap_unchecked() }; *letter = b'a'; @@ -577,10 +586,16 @@ fn fast_uids() { &[ "$a", "$b", "$c", "$d", "$e", "$f", "$g", "$h", "$i", "$j", "$k", "$l", "$m", "$n", "$o", "$p", "$q", "$r", "$s", "$t", "$u", "$v", "$w", "$x", "$y", "$z", + "$A", "$B", "$C", "$D", "$E", "$F", "$G", "$H", "$I", "$J", "$K", "$L", "$M", + "$N", "$O", "$P", "$Q", "$R", "$S", "$T", "$U", "$V", "$W", "$X", "$Y", "$Z", "$aa", "$ab", "$ac", "$ad", "$ae", "$af", "$ag", "$ah", "$ai", "$aj", "$ak", "$al", "$am", "$an", "$ao", "$ap", "$aq", "$ar", "$as", "$at", "$au", "$av", "$aw", "$ax", "$ay", "$az", + "$aA", "$aB", "$aC", "$aD", "$aE", "$aF", "$aG", "$aH", "$aI", "$aJ", "$aK", "$aL", "$aM", + "$aN", "$aO", "$aP", "$aQ", "$aR", "$aS", "$aT", "$aU", "$aV", "$aW", "$aX", "$aY", "$aZ", "$ba", "$bb", "$bc", "$bd", "$be", "$bf", "$bg", "$bh", "$bi", "$bj", "$bk", "$bl", "$bm", "$bn", "$bo", "$bp", "$bq", "$br", "$bs", "$bt", "$bu", "$bv", "$bw", "$bx", "$by", "$bz", + "$bA", "$bB", "$bC", "$bD", "$bE", "$bF", "$bG", "$bH", "$bI", "$bJ", "$bK", "$bL", "$bM", + "$bN", "$bO", "$bP", "$bQ", "$bR", "$bS", "$bT", "$bU", "$bV", "$bW", "$bX", "$bY", "$bZ", "$ca", ], ), @@ -589,10 +604,16 @@ fn fast_uids() { &[ "$a", "$b", "$c", "$d", "$e", "$f", "$g", "$h", "$i", "$j", "$k", "$l", "$m", "$n", "$o", "$p", "$q", "$r", "$s", "$t", "$u", "$v", "$w", "$x", "$y", "$z", + "$A", "$B", "$C", "$D", "$E", "$F", "$G", "$H", "$I", "$J", "$K", "$L", "$M", + "$N", "$O", "$P", "$Q", "$R", "$S", "$T", "$U", "$V", "$W", "$X", "$Y", "$Z", "$aa", "$ab", "$ac", "$ad", "$ae", "$af", "$ag", "$ah", "$ai", "$aj", "$ak", "$al", "$am", "$an", "$ao", "$ap", "$aq", "$ar", "$as", "$at", "$au", "$av", "$aw", "$ax", "$ay", "$az", + "$aA", "$aB", "$aC", "$aD", "$aE", "$aF", "$aG", "$aH", "$aI", "$aJ", "$aK", "$aL", "$aM", + "$aN", "$aO", "$aP", "$aQ", "$aR", "$aS", "$aT", "$aU", "$aV", "$aW", "$aX", "$aY", "$aZ", "$ba", "$bb", "$bc", "$bd", "$be", "$bf", "$bg", "$bh", "$bi", "$bj", "$bk", "$bl", "$bm", "$bn", "$bo", "$bp", "$bq", "$br", "$bs", "$bt", "$bu", "$bv", "$bw", "$bx", "$by", "$bz", + "$bA", "$bB", "$bC", "$bD", "$bE", "$bF", "$bG", "$bH", "$bI", "$bJ", "$bK", "$bL", "$bM", + "$bN", "$bO", "$bP", "$bQ", "$bR", "$bS", "$bT", "$bU", "$bV", "$bW", "$bX", "$bY", "$bZ", "$ca", ], ), @@ -601,10 +622,16 @@ fn fast_uids() { &[ "$$a", "$$b", "$$c", "$$d", "$$e", "$$f", "$$g", "$$h", "$$i", "$$j", "$$k", "$$l", "$$m", "$$n", "$$o", "$$p", "$$q", "$$r", "$$s", "$$t", "$$u", "$$v", "$$w", "$$x", "$$y", "$$z", + "$$A", "$$B", "$$C", "$$D", "$$E", "$$F", "$$G", "$$H", "$$I", "$$J", "$$K", "$$L", "$$M", + "$$N", "$$O", "$$P", "$$Q", "$$R", "$$S", "$$T", "$$U", "$$V", "$$W", "$$X", "$$Y", "$$Z", "$$aa", "$$ab", "$$ac", "$$ad", "$$ae", "$$af", "$$ag", "$$ah", "$$ai", "$$aj", "$$ak", "$$al", "$$am", "$$an", "$$ao", "$$ap", "$$aq", "$$ar", "$$as", "$$at", "$$au", "$$av", "$$aw", "$$ax", "$$ay", "$$az", + "$$aA", "$$aB", "$$aC", "$$aD", "$$aE", "$$aF", "$$aG", "$$aH", "$$aI", "$$aJ", "$$aK", "$$aL", "$$aM", + "$$aN", "$$aO", "$$aP", "$$aQ", "$$aR", "$$aS", "$$aT", "$$aU", "$$aV", "$$aW", "$$aX", "$$aY", "$$aZ", "$$ba", "$$bb", "$$bc", "$$bd", "$$be", "$$bf", "$$bg", "$$bh", "$$bi", "$$bj", "$$bk", "$$bl", "$$bm", "$$bn", "$$bo", "$$bp", "$$bq", "$$br", "$$bs", "$$bt", "$$bu", "$$bv", "$$bw", "$$bx", "$$by", "$$bz", + "$$bA", "$$bB", "$$bC", "$$bD", "$$bE", "$$bF", "$$bG", "$$bH", "$$bI", "$$bJ", "$$bK", "$$bL", "$$bM", + "$$bN", "$$bO", "$$bP", "$$bQ", "$$bR", "$$bS", "$$bT", "$$bU", "$$bV", "$$bW", "$$bX", "$$bY", "$$bZ", "$$ca", ], ), @@ -613,10 +640,16 @@ fn fast_uids() { &[ "$$a", "$$b", "$$c", "$$d", "$$e", "$$f", "$$g", "$$h", "$$i", "$$j", "$$k", "$$l", "$$m", "$$n", "$$o", "$$p", "$$q", "$$r", "$$s", "$$t", "$$u", "$$v", "$$w", "$$x", "$$y", "$$z", + "$$A", "$$B", "$$C", "$$D", "$$E", "$$F", "$$G", "$$H", "$$I", "$$J", "$$K", "$$L", "$$M", + "$$N", "$$O", "$$P", "$$Q", "$$R", "$$S", "$$T", "$$U", "$$V", "$$W", "$$X", "$$Y", "$$Z", "$$aa", "$$ab", "$$ac", "$$ad", "$$ae", "$$af", "$$ag", "$$ah", "$$ai", "$$aj", "$$ak", "$$al", "$$am", "$$an", "$$ao", "$$ap", "$$aq", "$$ar", "$$as", "$$at", "$$au", "$$av", "$$aw", "$$ax", "$$ay", "$$az", + "$$aA", "$$aB", "$$aC", "$$aD", "$$aE", "$$aF", "$$aG", "$$aH", "$$aI", "$$aJ", "$$aK", "$$aL", "$$aM", + "$$aN", "$$aO", "$$aP", "$$aQ", "$$aR", "$$aS", "$$aT", "$$aU", "$$aV", "$$aW", "$$aX", "$$aY", "$$aZ", "$$ba", "$$bb", "$$bc", "$$bd", "$$be", "$$bf", "$$bg", "$$bh", "$$bi", "$$bj", "$$bk", "$$bl", "$$bm", "$$bn", "$$bo", "$$bp", "$$bq", "$$br", "$$bs", "$$bt", "$$bu", "$$bv", "$$bw", "$$bx", "$$by", "$$bz", + "$$bA", "$$bB", "$$bC", "$$bD", "$$bE", "$$bF", "$$bG", "$$bH", "$$bI", "$$bJ", "$$bK", "$$bL", "$$bM", + "$$bN", "$$bO", "$$bP", "$$bQ", "$$bR", "$$bS", "$$bT", "$$bU", "$$bV", "$$bW", "$$bX", "$$bY", "$$bZ", "$$ca", ], ), @@ -625,10 +658,16 @@ fn fast_uids() { &[ "$$$$a", "$$$$b", "$$$$c", "$$$$d", "$$$$e", "$$$$f", "$$$$g", "$$$$h", "$$$$i", "$$$$j", "$$$$k", "$$$$l", "$$$$m", "$$$$n", "$$$$o", "$$$$p", "$$$$q", "$$$$r", "$$$$s", "$$$$t", "$$$$u", "$$$$v", "$$$$w", "$$$$x", "$$$$y", "$$$$z", + "$$$$A", "$$$$B", "$$$$C", "$$$$D", "$$$$E", "$$$$F", "$$$$G", "$$$$H", "$$$$I", "$$$$J", "$$$$K", "$$$$L", "$$$$M", + "$$$$N", "$$$$O", "$$$$P", "$$$$Q", "$$$$R", "$$$$S", "$$$$T", "$$$$U", "$$$$V", "$$$$W", "$$$$X", "$$$$Y", "$$$$Z", "$$$$aa", "$$$$ab", "$$$$ac", "$$$$ad", "$$$$ae", "$$$$af", "$$$$ag", "$$$$ah", "$$$$ai", "$$$$aj", "$$$$ak", "$$$$al", "$$$$am", "$$$$an", "$$$$ao", "$$$$ap", "$$$$aq", "$$$$ar", "$$$$as", "$$$$at", "$$$$au", "$$$$av", "$$$$aw", "$$$$ax", "$$$$ay", "$$$$az", + "$$$$aA", "$$$$aB", "$$$$aC", "$$$$aD", "$$$$aE", "$$$$aF", "$$$$aG", "$$$$aH", "$$$$aI", "$$$$aJ", "$$$$aK", "$$$$aL", "$$$$aM", + "$$$$aN", "$$$$aO", "$$$$aP", "$$$$aQ", "$$$$aR", "$$$$aS", "$$$$aT", "$$$$aU", "$$$$aV", "$$$$aW", "$$$$aX", "$$$$aY", "$$$$aZ", "$$$$ba", "$$$$bb", "$$$$bc", "$$$$bd", "$$$$be", "$$$$bf", "$$$$bg", "$$$$bh", "$$$$bi", "$$$$bj", "$$$$bk", "$$$$bl", "$$$$bm", "$$$$bn", "$$$$bo", "$$$$bp", "$$$$bq", "$$$$br", "$$$$bs", "$$$$bt", "$$$$bu", "$$$$bv", "$$$$bw", "$$$$bx", "$$$$by", "$$$$bz", + "$$$$bA", "$$$$bB", "$$$$bC", "$$$$bD", "$$$$bE", "$$$$bF", "$$$$bG", "$$$$bH", "$$$$bI", "$$$$bJ", "$$$$bK", "$$$$bL", "$$$$bM", + "$$$$bN", "$$$$bO", "$$$$bP", "$$$$bQ", "$$$$bR", "$$$$bS", "$$$$bT", "$$$$bU", "$$$$bV", "$$$$bW", "$$$$bX", "$$$$bY", "$$$$bZ", "$$$$ca", ], ), @@ -637,10 +676,16 @@ fn fast_uids() { &[ "$$$$a", "$$$$b", "$$$$c", "$$$$d", "$$$$e", "$$$$f", "$$$$g", "$$$$h", "$$$$i", "$$$$j", "$$$$k", "$$$$l", "$$$$m", "$$$$n", "$$$$o", "$$$$p", "$$$$q", "$$$$r", "$$$$s", "$$$$t", "$$$$u", "$$$$v", "$$$$w", "$$$$x", "$$$$y", "$$$$z", + "$$$$A", "$$$$B", "$$$$C", "$$$$D", "$$$$E", "$$$$F", "$$$$G", "$$$$H", "$$$$I", "$$$$J", "$$$$K", "$$$$L", "$$$$M", + "$$$$N", "$$$$O", "$$$$P", "$$$$Q", "$$$$R", "$$$$S", "$$$$T", "$$$$U", "$$$$V", "$$$$W", "$$$$X", "$$$$Y", "$$$$Z", "$$$$aa", "$$$$ab", "$$$$ac", "$$$$ad", "$$$$ae", "$$$$af", "$$$$ag", "$$$$ah", "$$$$ai", "$$$$aj", "$$$$ak", "$$$$al", "$$$$am", "$$$$an", "$$$$ao", "$$$$ap", "$$$$aq", "$$$$ar", "$$$$as", "$$$$at", "$$$$au", "$$$$av", "$$$$aw", "$$$$ax", "$$$$ay", "$$$$az", + "$$$$aA", "$$$$aB", "$$$$aC", "$$$$aD", "$$$$aE", "$$$$aF", "$$$$aG", "$$$$aH", "$$$$aI", "$$$$aJ", "$$$$aK", "$$$$aL", "$$$$aM", + "$$$$aN", "$$$$aO", "$$$$aP", "$$$$aQ", "$$$$aR", "$$$$aS", "$$$$aT", "$$$$aU", "$$$$aV", "$$$$aW", "$$$$aX", "$$$$aY", "$$$$aZ", "$$$$ba", "$$$$bb", "$$$$bc", "$$$$bd", "$$$$be", "$$$$bf", "$$$$bg", "$$$$bh", "$$$$bi", "$$$$bj", "$$$$bk", "$$$$bl", "$$$$bm", "$$$$bn", "$$$$bo", "$$$$bp", "$$$$bq", "$$$$br", "$$$$bs", "$$$$bt", "$$$$bu", "$$$$bv", "$$$$bw", "$$$$bx", "$$$$by", "$$$$bz", + "$$$$bA", "$$$$bB", "$$$$bC", "$$$$bD", "$$$$bE", "$$$$bF", "$$$$bG", "$$$$bH", "$$$$bI", "$$$$bJ", "$$$$bK", "$$$$bL", "$$$$bM", + "$$$$bN", "$$$$bO", "$$$$bP", "$$$$bQ", "$$$$bR", "$$$$bS", "$$$$bT", "$$$$bU", "$$$$bV", "$$$$bW", "$$$$bX", "$$$$bY", "$$$$bZ", "$$$$ca", ], ), @@ -649,10 +694,16 @@ fn fast_uids() { &[ "$$$$a", "$$$$b", "$$$$c", "$$$$d", "$$$$e", "$$$$f", "$$$$g", "$$$$h", "$$$$i", "$$$$j", "$$$$k", "$$$$l", "$$$$m", "$$$$n", "$$$$o", "$$$$p", "$$$$q", "$$$$r", "$$$$s", "$$$$t", "$$$$u", "$$$$v", "$$$$w", "$$$$x", "$$$$y", "$$$$z", + "$$$$A", "$$$$B", "$$$$C", "$$$$D", "$$$$E", "$$$$F", "$$$$G", "$$$$H", "$$$$I", "$$$$J", "$$$$K", "$$$$L", "$$$$M", + "$$$$N", "$$$$O", "$$$$P", "$$$$Q", "$$$$R", "$$$$S", "$$$$T", "$$$$U", "$$$$V", "$$$$W", "$$$$X", "$$$$Y", "$$$$Z", "$$$$aa", "$$$$ab", "$$$$ac", "$$$$ad", "$$$$ae", "$$$$af", "$$$$ag", "$$$$ah", "$$$$ai", "$$$$aj", "$$$$ak", "$$$$al", "$$$$am", "$$$$an", "$$$$ao", "$$$$ap", "$$$$aq", "$$$$ar", "$$$$as", "$$$$at", "$$$$au", "$$$$av", "$$$$aw", "$$$$ax", "$$$$ay", "$$$$az", + "$$$$aA", "$$$$aB", "$$$$aC", "$$$$aD", "$$$$aE", "$$$$aF", "$$$$aG", "$$$$aH", "$$$$aI", "$$$$aJ", "$$$$aK", "$$$$aL", "$$$$aM", + "$$$$aN", "$$$$aO", "$$$$aP", "$$$$aQ", "$$$$aR", "$$$$aS", "$$$$aT", "$$$$aU", "$$$$aV", "$$$$aW", "$$$$aX", "$$$$aY", "$$$$aZ", "$$$$ba", "$$$$bb", "$$$$bc", "$$$$bd", "$$$$be", "$$$$bf", "$$$$bg", "$$$$bh", "$$$$bi", "$$$$bj", "$$$$bk", "$$$$bl", "$$$$bm", "$$$$bn", "$$$$bo", "$$$$bp", "$$$$bq", "$$$$br", "$$$$bs", "$$$$bt", "$$$$bu", "$$$$bv", "$$$$bw", "$$$$bx", "$$$$by", "$$$$bz", + "$$$$bA", "$$$$bB", "$$$$bC", "$$$$bD", "$$$$bE", "$$$$bF", "$$$$bG", "$$$$bH", "$$$$bI", "$$$$bJ", "$$$$bK", "$$$$bL", "$$$$bM", + "$$$$bN", "$$$$bO", "$$$$bP", "$$$$bQ", "$$$$bR", "$$$$bS", "$$$$bT", "$$$$bU", "$$$$bV", "$$$$bW", "$$$$bX", "$$$$bY", "$$$$bZ", "$$$$ca", ], ), From 59bc5d89336b74b1e333115ffb084afe95212c56 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sat, 3 May 2025 23:05:52 +0100 Subject: [PATCH 7/8] Increment letter without branch --- crates/oxc_traverse/src/context/uid.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/crates/oxc_traverse/src/context/uid.rs b/crates/oxc_traverse/src/context/uid.rs index 489de3b4a7fd1..8c2d3cf111062 100644 --- a/crates/oxc_traverse/src/context/uid.rs +++ b/crates/oxc_traverse/src/context/uid.rs @@ -191,16 +191,15 @@ impl<'a> FastUidGenerator<'a> { // SAFETY: `last_letter_ptr` points to last byte of the buffer. // All bytes of the buffer are initialized. No other references to buffer exist. let last_letter = unsafe { self.last_letter_ptr.as_mut().unwrap_unchecked() }; - if (*last_letter | 32) < b'z' { - // `| 32` converts `A-Z` to lower case, so this matches `a-y` or `A-Y` or "`" - *last_letter += 1; - } else if *last_letter == b'z' { - *last_letter = b'A'; - } else { - debug_assert_eq!(*last_letter, b'Z'); + if *last_letter == b'Z' { return self.rollover(); } + // Increment letter, unless letter is `z` in which case jump to `A`. + // Performed with arithmetic to avoid a branch. https://godbolt.org/z/Kq81Kc4xq + *last_letter = (*last_letter) + .wrapping_add(1 + u8::from(*last_letter == b'z') * (b'A'.wrapping_sub(b'z') - 1)); + self.get_active() } From ea07034a80a456761a6ee64c6452bb68e208e360 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sat, 3 May 2025 23:13:21 +0100 Subject: [PATCH 8/8] Fix comment --- crates/oxc_traverse/src/context/uid.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/oxc_traverse/src/context/uid.rs b/crates/oxc_traverse/src/context/uid.rs index 8c2d3cf111062..3f35a9665fb8c 100644 --- a/crates/oxc_traverse/src/context/uid.rs +++ b/crates/oxc_traverse/src/context/uid.rs @@ -196,8 +196,8 @@ impl<'a> FastUidGenerator<'a> { } // Increment letter, unless letter is `z` in which case jump to `A`. - // Performed with arithmetic to avoid a branch. https://godbolt.org/z/Kq81Kc4xq - *last_letter = (*last_letter) + // Performed with arithmetic to avoid a branch. https://godbolt.org/z/Kxo9Wc98K + *last_letter = last_letter .wrapping_add(1 + u8::from(*last_letter == b'z') * (b'A'.wrapping_sub(b'z') - 1)); self.get_active()