diff --git a/Cargo.lock b/Cargo.lock index a6756661b6ce8..32e2183f08aee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1833,6 +1833,7 @@ dependencies = [ name = "oxc_mangler" version = "0.48.0" dependencies = [ + "fixedbitset", "itertools", "oxc_allocator", "oxc_ast", diff --git a/Cargo.toml b/Cargo.toml index ee8568d694859..974c802bf7723 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -149,6 +149,7 @@ encoding_rs = "0.8.35" encoding_rs_io = "0.1.7" env_logger = { version = "0.11.5", default-features = false } fast-glob = "0.4.0" +fixedbitset = "0.5.7" flate2 = "1.0.35" futures = "0.3.31" globset = "0.4.15" diff --git a/crates/oxc_mangler/Cargo.toml b/crates/oxc_mangler/Cargo.toml index f13471dc27ce2..dd3988f77123a 100644 --- a/crates/oxc_mangler/Cargo.toml +++ b/crates/oxc_mangler/Cargo.toml @@ -27,5 +27,6 @@ oxc_index = { workspace = true } oxc_semantic = { workspace = true } oxc_span = { workspace = true } +fixedbitset = { workspace = true } itertools = { workspace = true } rustc-hash = { workspace = true } diff --git a/crates/oxc_mangler/src/lib.rs b/crates/oxc_mangler/src/lib.rs index 0082d319c7c17..0dc5fab99de5c 100644 --- a/crates/oxc_mangler/src/lib.rs +++ b/crates/oxc_mangler/src/lib.rs @@ -1,12 +1,14 @@ +use std::iter; use std::ops::Deref; +use fixedbitset::FixedBitSet; use itertools::Itertools; use rustc_hash::FxHashSet; use oxc_allocator::{Allocator, Vec}; use oxc_ast::ast::{Declaration, Program, Statement}; use oxc_index::Idx; -use oxc_semantic::{ReferenceId, ScopeTree, SemanticBuilder, SymbolId, SymbolTable}; +use oxc_semantic::{ReferenceId, ScopeTree, Semantic, SemanticBuilder, SymbolId, SymbolTable}; use oxc_span::Atom; #[derive(Default, Debug, Clone, Copy)] @@ -22,17 +24,19 @@ type Slot = usize; /// See: /// * [esbuild](https://github.com/evanw/esbuild/blob/v0.24.0/docs/architecture.md#symbol-minification) /// -/// This algorithm is targeted for better gzip compression. +/// This algorithm is based on the implementation of esbuild and additionally implements improved name reuse functionality. +/// It targets for better gzip compression. /// -/// Visually, a slot is the index position for binding identifiers: +/// A slot is a placeholder for binding identifiers that shares the same name. +/// Visually, it is the index position for binding identifiers: /// /// ```javascript -/// function slot0(slot2, slot3, slot4) { +/// function slot0(slot1, slot2, slot3) { /// slot2 = 1; /// } -/// function slot1(slot2, slot3) { -/// function slot4() { -/// slot2 = 1; +/// function slot1(slot0) { +/// function slot2() { +/// slot0 = 1; /// } /// } /// ``` @@ -40,24 +44,73 @@ type Slot = usize; /// The slot number for a new scope starts after the maximum slot of the parent scope. /// /// Occurrences of slots and their corresponding newly assigned short identifiers are: -/// - slot2: 4 - a -/// - slot3: 2 - b -/// - slot4: 2 - c -/// - slot0: 1 - d -/// - slot1: 1 - e +/// - slot2: 3 - a +/// - slot0: 2 - b +/// - slot1: 2 - c +/// - slot3: 1 - d /// /// After swapping out the mangled names: /// /// ```javascript -/// function d(a, b, c) { +/// function b(c, a, d) { /// a = 1; /// } -/// function e(a, b) { -/// function c() { -/// a = 1; +/// function c(b) { +/// function a() { +/// b = 1; /// } /// } /// ``` +/// +/// ## Name Reuse Calculation +/// +/// This improvement was inspired by [evanw/esbuild#2614](https://github.com/evanw/esbuild/pull/2614). +/// +/// For better compression, we shadow the variables where possible to reuse the same name. +/// For example, the following code: +/// ```javascript +/// var top_level_a = 0; +/// var top_level_b = 1; +/// function foo() { +/// var foo_a = 1; +/// console.log(top_level_b, foo_a); +/// } +/// function bar() { +/// var bar_a = 1; +/// console.log(top_level_b, bar_a); +/// } +/// console.log(top_level_a, foo(), bar()) +/// ``` +/// `top_level_a` is declared in the root scope, but is not used in function `foo` and function `bar`. +/// Therefore, we can reuse the same name for `top_level_a` and `foo_a` and `bar_a`. +/// +/// To calculate whether the variable name can be reused in the descendant scopes, +/// this mangler introduces a concept of symbol liveness and slot liveness. +/// Symbol liveness is a subtree of the scope tree that contains the declared scope of the symbol and +/// all the scopes that the symbol is used in. It is a subtree, so any scopes that are between the declared scope and the used scope +/// are also included. This is to ensure that the symbol is not shadowed by a different symbol before the use in the descendant scope. +/// +/// For the example above, the liveness of each symbols are: +/// - `top_level_a`: root_scope +/// - `top_level_b`: root_scope -> foo, root_scope -> bar +/// - `foo_a`: root_scope -> foo +/// - `bar_a`: root_scope -> bar +/// - `foo`: root_scope +/// - `bar`: root_scope +/// +/// Slot liveness is the same as symbol liveness, but it is a subforest (multiple subtrees) of the scope tree that can contain +/// multiple symbol liveness. +/// +/// Now that we have the liveness of each symbol, we want to assign symbols to minimal number of slots. +/// This is a graph coloring problem where the node of the graph is the symbol and the edge of the graph indicates whether +/// the symbols has a common alive scope and the color of the node is the slot. +/// This mangler uses a greedy algorithm to assign symbols to slots to achieve that. +/// In other words, it assigns symbols to the first slot that does not live in the liveness of the symbol. +/// For the example above, each symbol is assigned to the following slots: +/// - slot 0: `top_level_a` +/// - slot 1: `top_level_b`, `foo_a`, `bar_a` +/// - slot 2: `foo` +/// - slot 3: `bar` #[derive(Default)] pub struct Mangler { symbol_table: SymbolTable, @@ -88,22 +141,20 @@ impl Mangler { #[must_use] pub fn build(self, program: &Program<'_>) -> Mangler { - let semantic = SemanticBuilder::new().build(program).semantic; - let (symbol_table, scope_tree) = semantic.into_symbol_table_and_scope_tree(); - self.build_with_symbols_and_scopes(symbol_table, &scope_tree, program) + let semantic = + SemanticBuilder::new().with_scope_tree_child_ids(true).build(program).semantic; + self.build_with_semantic(semantic, program) } + /// # Panics + /// + /// Panics if the child_ids does not exist in scope_tree. #[must_use] - pub fn build_with_symbols_and_scopes( - self, - symbol_table: SymbolTable, - scope_tree: &ScopeTree, - program: &Program<'_>, - ) -> Mangler { + pub fn build_with_semantic(self, semantic: Semantic<'_>, program: &Program<'_>) -> Mangler { if self.options.debug { - self.build_with_symbols_and_scopes_impl(symbol_table, scope_tree, program, debug_name) + self.build_with_symbols_and_scopes_impl(semantic, program, debug_name) } else { - self.build_with_symbols_and_scopes_impl(symbol_table, scope_tree, program, base54) + self.build_with_symbols_and_scopes_impl(semantic, program, base54) } } @@ -112,11 +163,14 @@ impl Mangler { G: Fn(usize) -> InlineString, >( mut self, - symbol_table: SymbolTable, - scope_tree: &ScopeTree, + semantic: Semantic<'_>, program: &Program<'_>, generate_name: G, ) -> Mangler { + let (mut symbol_table, scope_tree, ast_nodes) = semantic.into_symbols_scopes_nodes(); + + assert!(scope_tree.has_child_ids(), "child_id needs to be generated"); + let (exported_names, exported_symbols) = if self.options.top_level { Mangler::collect_exported_symbols(program) } else { @@ -125,59 +179,81 @@ impl Mangler { let allocator = Allocator::default(); - // Mangle the symbol table by computing slots from the scope tree. - // A slot is the occurrence index of a binding identifier inside a scope. - let mut symbol_table = symbol_table; - - // Total number of slots for all scopes - let mut total_number_of_slots: Slot = 0; - // All symbols with their assigned slots. Keyed by symbol id. let mut slots: Vec<'_, Slot> = Vec::with_capacity_in(symbol_table.len(), &allocator); for _ in 0..symbol_table.len() { slots.push(0); } - // Keep track of the maximum slot number for each scope - let mut max_slot_for_scope = Vec::with_capacity_in(scope_tree.len(), &allocator); - for _ in 0..scope_tree.len() { - max_slot_for_scope.push(0); - } + // Stores the lived scope ids for each slot. Keyed by slot number. + let mut slot_liveness: std::vec::Vec = vec![]; - // Walk the scope tree and compute the slot number for each scope let mut tmp_bindings = std::vec::Vec::with_capacity(100); - for scope_id in scope_tree.descendants_from_root() { + let mut reusable_slots = std::vec::Vec::new(); + // Walk down the scope tree and assign a slot number for each symbol. + // It is possible to do this in a loop over the symbol list, + // but walking down the scope tree seems to generate a better code. + for scope_id in iter::once(scope_tree.root_scope_id()) + .chain(scope_tree.iter_all_child_ids(scope_tree.root_scope_id())) + { let bindings = scope_tree.get_bindings(scope_id); - - // The current slot number is continued by the maximum slot from the parent scope - let parent_max_slot = scope_tree - .get_parent_id(scope_id) - .map_or(0, |parent_scope_id| max_slot_for_scope[parent_scope_id.index()]); - - let mut slot = parent_max_slot; - - if !bindings.is_empty() { - // Sort `bindings` in declaration order. - tmp_bindings.clear(); - tmp_bindings.extend(bindings.values().copied()); - tmp_bindings.sort_unstable(); - for symbol_id in &tmp_bindings { - slots[symbol_id.index()] = slot; - slot += 1; - } + if bindings.is_empty() { + continue; } - max_slot_for_scope[scope_id.index()] = slot; + let mut slot = slot_liveness.len(); + + reusable_slots.clear(); + reusable_slots.extend( + // Slots that are already assigned to other symbols, but does not live in the current scope. + slot_liveness + .iter() + .enumerate() + .filter(|(_, slot_liveness)| !slot_liveness.contains(scope_id.index())) + .map(|(slot, _)| slot) + .take(bindings.len()), + ); + + // The number of new slots that needs to be allocated. + let remaining_count = bindings.len() - reusable_slots.len(); + reusable_slots.extend(slot..slot + remaining_count); + + slot += remaining_count; + if slot_liveness.len() < slot { + slot_liveness.resize_with(slot, || FixedBitSet::with_capacity(scope_tree.len())); + } - if slot > total_number_of_slots { - total_number_of_slots = slot; + // Sort `bindings` in declaration order. + tmp_bindings.clear(); + tmp_bindings.extend(bindings.values().copied()); + tmp_bindings.sort_unstable(); + for (symbol_id, assigned_slot) in + tmp_bindings.iter().zip(reusable_slots.iter().copied()) + { + slots[symbol_id.index()] = assigned_slot; + + // Calculate the scope ids that this symbol is alive in. + let lived_scope_ids = symbol_table + .get_resolved_references(*symbol_id) + .flat_map(|reference| { + let used_scope_id = ast_nodes.get_node(reference.node_id()).scope_id(); + scope_tree.ancestors(used_scope_id).take_while(|s_id| *s_id != scope_id) + }) + .chain(iter::once(scope_id)); + + // Since the slot is now assigned to this symbol, it is alive in all the scopes that this symbol is alive in. + for scope_id in lived_scope_ids { + slot_liveness[assigned_slot].insert(scope_id.index()); + } } } + let total_number_of_slots = slot_liveness.len(); + let frequencies = self.tally_slot_frequencies( &symbol_table, &exported_symbols, - scope_tree, + &scope_tree, total_number_of_slots, &slots, &allocator, diff --git a/crates/oxc_minifier/src/lib.rs b/crates/oxc_minifier/src/lib.rs index 6904b981b53b0..ede3349e6f584 100644 --- a/crates/oxc_minifier/src/lib.rs +++ b/crates/oxc_minifier/src/lib.rs @@ -55,11 +55,12 @@ impl Minifier { Stats::default() }; let mangler = self.options.mangle.map(|options| { - let semantic = SemanticBuilder::new().with_stats(stats).build(program).semantic; - let (symbols, scopes) = semantic.into_symbol_table_and_scope_tree(); - Mangler::default() - .with_options(options) - .build_with_symbols_and_scopes(symbols, &scopes, program) + let semantic = SemanticBuilder::new() + .with_stats(stats) + .with_scope_tree_child_ids(true) + .build(program) + .semantic; + Mangler::default().with_options(options).build_with_semantic(semantic, program) }); MinifierReturn { mangler } } diff --git a/crates/oxc_minifier/tests/mangler/mod.rs b/crates/oxc_minifier/tests/mangler/mod.rs index 3d76bed459fca..8534638354ddb 100644 --- a/crates/oxc_minifier/tests/mangler/mod.rs +++ b/crates/oxc_minifier/tests/mangler/mod.rs @@ -25,6 +25,21 @@ fn mangler() { "var x; function foo(a) { ({ x } = y) }", "import { x } from 's'; export { x }", "function _ (exports) { Object.defineProperty(exports, '__esModule', { value: true }) }", + "function foo(foo_a, foo_b, foo_c) {}; function bar(bar_a, bar_b, bar_c) {}", // foo_a and bar_a can be reused + "function _() { function foo() { var x; foo; } }", // x should not use the same name with foo + "function _() { var x; function foo() { var y; function bar() { x } } }", // y should not shadow x + "function _() { function x(a) {} }", // a can shadow x + "function _() { function x(a) { x } }", // a should not shadow x + "function _() { var x; { var y }}", // y should not shadow x + "function _() { var x; { let y }}", // y can shadow x + "function _() { let x; { let y }}", // y can shadow x + "function _() { var x; { const y }}", // y can shadow x + "function _() { let x; { const y }}", // y can shadow x + "function _() { var x; { class Y{} }}", // Y can shadow x + "function _() { let x; { class Y{} }}", // Y can shadow x + "function _() { var x; try { throw 0 } catch (e) { e } }", // e can shadow x + "function _() { var x; try { throw 0 } catch (e) { var e } }", // e can shadow x (not implemented) + "function _() { var x; try { throw 0 } catch { var e } }", // e should not shadow x ]; let top_level_cases = [ "function foo(a) {a}", diff --git a/crates/oxc_minifier/tests/mangler/snapshots/mangler.snap b/crates/oxc_minifier/tests/mangler/snapshots/mangler.snap index 39756b92e6a0c..5056372bf82bf 100644 --- a/crates/oxc_minifier/tests/mangler/snapshots/mangler.snap +++ b/crates/oxc_minifier/tests/mangler/snapshots/mangler.snap @@ -2,23 +2,23 @@ source: crates/oxc_minifier/tests/mangler/mod.rs --- function foo(a) {a} -function foo(b) { - b; +function foo(a) { + a; } function foo(a) { let _ = { x } } -function foo(b) { - let c = { x }; +function foo(a) { + let b = { x }; } function foo(a) { let { x } = y } -function foo(b) { - let { x: c } = y; +function foo(a) { + let { x: b } = y; } var x; function foo(a) { ({ x } = y) } var x; -function foo(c) { +function foo(b) { ({x} = y); } @@ -31,9 +31,131 @@ function _(exports) { Object.defineProperty(exports, "__esModule", { value: true }); } +function foo(foo_a, foo_b, foo_c) {}; function bar(bar_a, bar_b, bar_c) {} +function foo(a, b, c) {} +; +function bar(a, b, c) {} + +function _() { function foo() { var x; foo; } } +function _() { + function a() { + var b; + a; + } +} + +function _() { var x; function foo() { var y; function bar() { x } } } +function _() { + var a; + function b() { + var b; + function c() { + a; + } + } +} + +function _() { function x(a) {} } +function _() { + function a(a) {} +} + +function _() { function x(a) { x } } +function _() { + function a(b) { + a; + } +} + +function _() { var x; { var y }} +function _() { + var a; + { + var b; + } +} + +function _() { var x; { let y }} +function _() { + var a; + { + let a; + } +} + +function _() { let x; { let y }} +function _() { + let a; + { + let a; + } +} + +function _() { var x; { const y }} +function _() { + var a; + { + const a; + } +} + +function _() { let x; { const y }} +function _() { + let a; + { + const a; + } +} + +function _() { var x; { class Y{} }} +function _() { + var a; + { + class a {} + } +} + +function _() { let x; { class Y{} }} +function _() { + let a; + { + class a {} + } +} + +function _() { var x; try { throw 0 } catch (e) { e } } +function _() { + var a; + try { + throw 0; + } catch (a) { + a; + } +} + +function _() { var x; try { throw 0 } catch (e) { var e } } +function _() { + var a; + try { + throw 0; + } catch (b) { + var b; + } +} + +function _() { var x; try { throw 0 } catch { var e } } +function _() { + var a; + try { + throw 0; + } catch { + var b; + } +} + function foo(a) {a} -function a(b) { - b; +function a(a) { + a; } export function foo() {}; foo() diff --git a/crates/oxc_semantic/src/lib.rs b/crates/oxc_semantic/src/lib.rs index 3bb10b9fe8652..175a191ce83f7 100644 --- a/crates/oxc_semantic/src/lib.rs +++ b/crates/oxc_semantic/src/lib.rs @@ -90,12 +90,16 @@ pub struct Semantic<'a> { } impl<'a> Semantic<'a> { - /// Extract the [`SymbolTable`] and [`ScopeTree`] from the [`Semantic`] - /// instance, consuming `self`. + /// Extract [`SymbolTable`] and [`ScopeTree`] from [`Semantic`]. pub fn into_symbol_table_and_scope_tree(self) -> (SymbolTable, ScopeTree) { (self.symbols, self.scopes) } + /// Extract [`SymbolTable`], [`ScopeTree`] and [`AstNode`] from the [`Semantic`]. + pub fn into_symbols_scopes_nodes(self) -> (SymbolTable, ScopeTree, AstNodes<'a>) { + (self.symbols, self.scopes, self.nodes) + } + /// Source code of the JavaScript/TypeScript program being analyzed. pub fn source_text(&self) -> &'a str { self.source_text diff --git a/napi/minify/test/minify.test.ts b/napi/minify/test/minify.test.ts index a9a88a1cb36ce..3841d0c5dba05 100644 --- a/napi/minify/test/minify.test.ts +++ b/napi/minify/test/minify.test.ts @@ -8,7 +8,7 @@ describe('simple', () => { it('matches output', () => { const ret = minify('test.js', code, { sourcemap: true }); expect(ret).toStrictEqual({ - 'code': 'function foo(){var b;b(void 0)}foo();', + 'code': 'function foo(){var a;a(void 0)}foo();', 'map': { 'mappings': 'AAAA,SAAS,KAAM,CAAE,IAAIA,EAAK,SAAc,AAAE,CAAC,KAAK', 'names': [ diff --git a/tasks/benchmark/benches/minifier.rs b/tasks/benchmark/benches/minifier.rs index 25c7b3180418a..b9bd1fb15b217 100644 --- a/tasks/benchmark/benches/minifier.rs +++ b/tasks/benchmark/benches/minifier.rs @@ -58,13 +58,10 @@ fn bench_mangler(criterion: &mut Criterion) { b.iter_with_setup_wrapper(|runner| { allocator.reset(); let program = Parser::new(&allocator, source_text, source_type).parse().program; - let (symbols, scopes) = SemanticBuilder::new() - .build(&program) - .semantic - .into_symbol_table_and_scope_tree(); + let semantic = + SemanticBuilder::new().with_scope_tree_child_ids(true).build(&program).semantic; runner.run(|| { - let _ = - Mangler::new().build_with_symbols_and_scopes(symbols, &scopes, &program); + let _ = Mangler::new().build_with_semantic(semantic, &program); }); }); }); diff --git a/tasks/minsize/minsize.snap b/tasks/minsize/minsize.snap index c49db0d51b92c..0513ae9b85a62 100644 --- a/tasks/minsize/minsize.snap +++ b/tasks/minsize/minsize.snap @@ -1,27 +1,27 @@ | Oxc | ESBuild | Oxc | ESBuild | Original | minified | minified | gzip | gzip | Fixture ------------------------------------------------------------------------------------- -72.14 kB | 23.67 kB | 23.70 kB | 8.60 kB | 8.54 kB | react.development.js +72.14 kB | 23.61 kB | 23.70 kB | 8.55 kB | 8.54 kB | react.development.js -173.90 kB | 59.79 kB | 59.82 kB | 19.41 kB | 19.33 kB | moment.js +173.90 kB | 59.71 kB | 59.82 kB | 19.26 kB | 19.33 kB | moment.js -287.63 kB | 90.08 kB | 90.07 kB | 32.03 kB | 31.95 kB | jquery.js +287.63 kB | 89.58 kB | 90.07 kB | 31.08 kB | 31.95 kB | jquery.js -342.15 kB | 118.19 kB | 118.14 kB | 44.45 kB | 44.37 kB | vue.js +342.15 kB | 117.76 kB | 118.14 kB | 43.67 kB | 44.37 kB | vue.js -544.10 kB | 71.75 kB | 72.48 kB | 26.15 kB | 26.20 kB | lodash.js +544.10 kB | 71.50 kB | 72.48 kB | 25.92 kB | 26.20 kB | lodash.js -555.77 kB | 272.89 kB | 270.13 kB | 90.90 kB | 90.80 kB | d3.js +555.77 kB | 272.35 kB | 270.13 kB | 88.60 kB | 90.80 kB | d3.js -1.01 MB | 460.16 kB | 458.89 kB | 126.78 kB | 126.71 kB | bundle.min.js +1.01 MB | 458.28 kB | 458.89 kB | 123.94 kB | 126.71 kB | bundle.min.js -1.25 MB | 652.68 kB | 646.76 kB | 163.48 kB | 163.73 kB | three.js +1.25 MB | 650.82 kB | 646.76 kB | 161.51 kB | 163.73 kB | three.js -2.14 MB | 723.85 kB | 724.14 kB | 179.88 kB | 181.07 kB | victory.js +2.14 MB | 719.54 kB | 724.14 kB | 162.47 kB | 181.07 kB | victory.js -3.20 MB | 1.01 MB | 1.01 MB | 331.98 kB | 331.56 kB | echarts.js +3.20 MB | 1.01 MB | 1.01 MB | 325.40 kB | 331.56 kB | echarts.js -6.69 MB | 2.31 MB | 2.31 MB | 491.91 kB | 488.28 kB | antd.js +6.69 MB | 2.30 MB | 2.31 MB | 470.00 kB | 488.28 kB | antd.js -10.95 MB | 3.48 MB | 3.49 MB | 905.29 kB | 915.50 kB | typescript.js +10.95 MB | 3.37 MB | 3.49 MB | 866.68 kB | 915.50 kB | typescript.js