From a4ae4505f12522f01fb36193da613024cc6a96e4 Mon Sep 17 00:00:00 2001 From: sapphi-red <49056869+sapphi-red@users.noreply.github.com> Date: Fri, 17 Jan 2025 05:53:54 +0000 Subject: [PATCH] feat(minifier): fold array concat chaining (#8440) Compress `[].concat(a).concat(b)` into `[].concat(a, b)`. **References** - [Spec of `Array::concat`](https://tc39.es/ecma262/multipage/indexed-collections.html#sec-array.prototype.concat) --- ### The new assumption I added a new assumption description in `crates/oxc_minifier/README.md`: "Errors thrown when creating a String or an Array that exceeds the maximum length can disappear or moved". This is an assumption held by other minifiers. Without this assumption, we have to treat `+` and array creation/update to have a sideeffect and that limits the minification. For input: ```js [...Array(Number(2n ** 32n - 1n)),""] " ".repeat(Number(2n ** 29n - 24n - 1n)) + ' ' export {} ``` (Note that `2 ** 32 - 1` is the max array length and `2 ** 29 - 24` is the max string length on V8.) This code errors with too long array and too long string on V8 based runtimes. terser outputs: ```js Array(Number(2n**32n-1n))," ".repeat(Number(536870887n));export{}; ``` No error will happen with this code. SWC outputs: ```js [...Array(Number(2n**32n-1n))]," ".repeat(Number(2n**29n-24n-1n));export{}; ``` No error will happen with this code. [playground](https://play.swc.rs/?version=1.10.7&code=H4sIAAAAAAAAA4vW09NzLCpKrNTwK81NSi3SMMpT0NJSMAZSugqGeZqaOkpKsbxcSgpKekWpBamJJajqjCxB6oxMYKoVtBXUFdR5uVIrCvKLShSqa3m5AGRqy%2FBiAAAA&config=H4sIAAAAAAAAA32UO3LjMAyG%2B5zCozrFjostcoDtcgYOTYIyvXxoCNCxJuO7L0RJtjeG3En48AMkAOL7bbfrTmi6j903f%2FLPoAtCuf2zBcdE%2BsKWDkzUaIofqHtf6Qkn5HRAaKbrTDrSpQdqKtz%2F2u8XRRdyRlgViy365N34mNPkOBRAfLCxlUPWCInwf%2F3CSv6aAJX6aD%2FkHECnF0RpVD4R9FCkwCaHoAcEddZFiDKdVBePWUoxwUpg1VDyIPJkPfmcOOcztaCtMtmCgHwBQ%2F4MkoxzsSwhX0%2B4T8MWDrXvW59%2FqOGsQ9Uk5IRLawmfVoh6zB5JuZqkEs5wowYzXIr7U%2BmdKkC1pGfdKfu00ZO%2FAFyBoBGTjiDFbR6O52lL7V4qfXI8sjQKnOdbumWCnouqvHdCZafKQCEvdbOArQamyhrpOAveKB96Cwqc41kRQuOXJ3OUktI4QHYC4P5qJ03VDNTtFW7w6UG8wH%2F4liQP2OIRNR23KY7xkMOLBBHomO0LB24F5W1ceEtchm1ekwUeDbCiS8UGnpcAPwDKKrR9%2BTQb%2FDw4oupDPtzXxOJwve3hqFN%2Ff%2B%2FzKn5bHLqYbW1wWfJTf%2BfV%2FLu7O61beD1B5%2FFzFbac13%2FKOhgeLwYAAA%3D%3D) esbuild outputs: ```js [...Array(Number(2n**32n-1n))]," ".repeat(Number(2n**29n-24n-1n))+"";export{}; ``` No error will happen with this code. [esbuild try](https://esbuild.github.io/try/#dAAwLjI0LjIALS1taW5pZnkAWy4uLkFycmF5KE51bWJlcigybiAqKiAzMm4gLSAxbikpLCIiXQoiICIucmVwZWF0KE51bWJlcigybiAqKiAyOW4gLSAyNG4gLSAxbikpICsgJyAnCmV4cG9ydCB7fQ) OXC outputs: ```js [...Array(Number(2n**32n-1n))]," ".repeat(Number(2n**29n-24n-1n))+" ";export{}; ``` The array error won't happen and the String error will happen with this code. [playground](https://playground.oxc.rs/#eNpVT71OwzAQfhXrlv4QIhpgIBtLR8SOGZxwCUH22To7baMo746dNEVMd5/u+7sRaijhI8/zV2Y1bN96UyFvCxL7vXiM414caLfLJEj4lCRBSMgZHarwn1u8JG7xtCrEndiIjSS8OMtBjJMkyMBCOQL3lIYfKKgLlIF7zEB3FKBslPYR+No6XC9+MJXVKwqsyDeWzZU8ZeAUe+TZ0vZc47HTSMpEAwjoQ/7jY7JjjKQTvitG8n/ilDuL437zXtyC4hZjKUBfPByeITJq+4UtpvoRmI66plu4tTUpIRovNc/fXcx2qr69YRS1+opmJwps9VHbc9KfkCvr43npNU2/v/WIsw==) --- crates/oxc_minifier/README.md | 2 + .../peephole_replace_known_methods.rs | 102 +++++++++++++++--- tasks/minsize/minsize.snap | 2 +- 3 files changed, 88 insertions(+), 18 deletions(-) diff --git a/crates/oxc_minifier/README.md b/crates/oxc_minifier/README.md index e88a31d7e5c3d..3ab1181bc393e 100644 --- a/crates/oxc_minifier/README.md +++ b/crates/oxc_minifier/README.md @@ -29,6 +29,8 @@ The compressor is responsible for rewriting statements and expressions for minim - Examples that breaks this assumption: `(() => { console.log(v); let v; })()` - `with` statement is not used - Examples that breaks this assumption: `with (Math) { console.log(PI); }` +- Errors thrown when creating a String or an Array that exceeds the maximum length can disappear or moved + - Examples that breaks this assumption: `try { new Array(Number(2n**53n)) } catch { console.log('log') }` ## Terser Tests diff --git a/crates/oxc_minifier/src/ast_passes/peephole_replace_known_methods.rs b/crates/oxc_minifier/src/ast_passes/peephole_replace_known_methods.rs index 4f9a1c29e2a98..c8349273c22a8 100644 --- a/crates/oxc_minifier/src/ast_passes/peephole_replace_known_methods.rs +++ b/crates/oxc_minifier/src/ast_passes/peephole_replace_known_methods.rs @@ -7,7 +7,7 @@ use oxc_ecmascript::{ constant_evaluation::ConstantEvaluation, StringCharAt, StringCharCodeAt, StringIndexOf, StringLastIndexOf, StringSubstring, ToInt32, }; -use oxc_traverse::{traverse_mut_with_ctx, ReusableTraverseCtx, Traverse, TraverseCtx}; +use oxc_traverse::{traverse_mut_with_ctx, Ancestor, ReusableTraverseCtx, Traverse, TraverseCtx}; use crate::{ctx::Ctx, CompressorPass}; @@ -26,6 +26,7 @@ impl<'a> CompressorPass<'a> for PeepholeReplaceKnownMethods { impl<'a> Traverse<'a> for PeepholeReplaceKnownMethods { fn exit_expression(&mut self, node: &mut Expression<'a>, ctx: &mut TraverseCtx<'a>) { + self.try_fold_array_concat(node, ctx); self.try_fold_known_string_methods(node, ctx); } } @@ -338,6 +339,83 @@ impl<'a> PeepholeReplaceKnownMethods { } result.into_iter().rev().collect() } + + /// `[].concat(a).concat(b)` -> `[].concat(a, b)` + fn try_fold_array_concat(&mut self, node: &mut Expression<'a>, ctx: &mut TraverseCtx<'a>) { + if matches!(ctx.parent(), Ancestor::StaticMemberExpressionObject(_)) { + return; + } + + let original_span = if let Expression::CallExpression(root_call_expr) = node { + root_call_expr.span + } else { + return; + }; + + let mut current_node: &mut Expression = node; + let mut collected_arguments = ctx.ast.vec(); + let new_root_callee: &mut Expression<'a>; + loop { + let Expression::CallExpression(ce) = current_node else { + return; + }; + let Expression::StaticMemberExpression(member) = &ce.callee else { + return; + }; + if member.optional || member.property.name != "concat" { + return; + } + + // We don't need to check if the arguments has a side effect here. + // + // The only side effect Array::concat can cause is throwing an error when the created array is too long. + // With the compressor assumption, that error can be moved. + // + // For example, if we have `[].concat(a).concat(b)`, the steps before the compression is: + // 1. evaluate `a` + // 2. `[].concat(a)` creates `[a]` + // 3. evaluate `b` + // 4. `.concat(b)` creates `[a, b]` + // + // The steps after the compression (`[].concat(a, b)`) is: + // 1. evaluate `a` + // 2. evaluate `b` + // 3. `[].concat(a, b)` creates `[a, b]` + // + // The error that has to be thrown in the second step before the compression will be thrown in the third step. + + let CallExpression { callee, arguments, .. } = ce.as_mut(); + collected_arguments.push(arguments); + + // [].concat() + let is_root_expr_concat = { + let Expression::StaticMemberExpression(member) = callee else { unreachable!() }; + matches!(&member.object, Expression::ArrayExpression(_)) + }; + if is_root_expr_concat { + new_root_callee = callee; + break; + } + + let Expression::StaticMemberExpression(member) = callee else { unreachable!() }; + current_node = &mut member.object; + } + + if collected_arguments.len() <= 1 { + return; + } + + *node = ctx.ast.expression_call( + original_span, + ctx.ast.move_expression(new_root_callee), + Option::::None, + ctx.ast.vec_from_iter( + collected_arguments.into_iter().rev().flat_map(|arg| ctx.ast.move_vec(arg)), + ), + false, + ); + self.changed = true; + } } /// Port from: @@ -1083,25 +1161,15 @@ mod test { } #[test] - #[ignore] fn test_fold_concat_chaining() { - // enableTypeCheck(); - fold("[1,2].concat(1).concat(2,['abc']).concat('abc')", "[1,2].concat(1,2,['abc'],'abc')"); - fold("[].concat(['abc']).concat(1).concat([2,3])", "['abc'].concat(1,[2,3])"); + fold("[].concat(['abc']).concat(1).concat([2,3])", "[].concat(['abc'],1,[2,3])"); - // cannot fold concat based on type information - fold_same("returnArrayType().concat(returnArrayType()).concat(1).concat(2)"); - fold_same("returnArrayType().concat(returnUnionType()).concat(1).concat(2)"); - fold( - "[1,2,1].concat(1).concat(returnArrayType()).concat(2)", - "[1,2,1].concat(1).concat(returnArrayType(),2)", - ); - fold( - "[1].concat(1).concat(2).concat(returnArrayType())", - "[1].concat(1,2).concat(returnArrayType())", - ); - fold_same("[].concat(1).concat(returnArrayType())"); + fold("var x, y; [1].concat(x).concat(y)", "var x, y; [1].concat(x, y)"); + fold("var y; [1].concat(x).concat(y)", "var y; [1].concat(x, y)"); // x might have a getter that updates y, but that side effect is preserved correctly + fold("var x; [1].concat(x.a).concat(x)", "var x; [1].concat(x.a, x)"); // x.a might have a getter that updates x, but that side effect is preserved correctly + + fold_same("[].concat(1)"); fold_same("obj.concat([1,2]).concat(1)"); } diff --git a/tasks/minsize/minsize.snap b/tasks/minsize/minsize.snap index 073f5b22e4919..0e8af93b80a83 100644 --- a/tasks/minsize/minsize.snap +++ b/tasks/minsize/minsize.snap @@ -19,7 +19,7 @@ Original | minified | minified | gzip | gzip | Fixture 2.14 MB | 725.56 kB | 724.14 kB | 180.06 kB | 181.07 kB | victory.js -3.20 MB | 1.01 MB | 1.01 MB | 332.02 kB | 331.56 kB | echarts.js +3.20 MB | 1.01 MB | 1.01 MB | 332.01 kB | 331.56 kB | echarts.js 6.69 MB | 2.32 MB | 2.31 MB | 492.65 kB | 488.28 kB | antd.js