diff --git a/crates/oxc_ecmascript/src/side_effects/mod.rs b/crates/oxc_ecmascript/src/side_effects/mod.rs index 0ef20bb469810..2d800153f201c 100644 --- a/crates/oxc_ecmascript/src/side_effects/mod.rs +++ b/crates/oxc_ecmascript/src/side_effects/mod.rs @@ -1,8 +1,10 @@ mod context; mod expressions; +mod pure_function; mod statements; pub use context::{MayHaveSideEffectsContext, PropertyReadSideEffects}; +pub use pure_function::is_pure_function; /// Returns true if subtree changes application state. /// diff --git a/crates/oxc_ecmascript/src/side_effects/pure_function.rs b/crates/oxc_ecmascript/src/side_effects/pure_function.rs new file mode 100644 index 0000000000000..b56156b03ddf6 --- /dev/null +++ b/crates/oxc_ecmascript/src/side_effects/pure_function.rs @@ -0,0 +1,104 @@ +use oxc_ast::ast::{ChainElement, Expression}; + +/// Check if the callee is a pure function based on a list of pure function names. +/// +/// This handles: +/// - Simple identifiers: `foo()` matches `["foo"]` +/// - Member expressions: `console.log()` matches `["console"]` or `["console.log"]` +/// - Chained calls: `styled()()` or `styled().div()` matches `["styled"]` +/// - Optional chaining: `styled?.div()` or `console?.log()` matches `["styled"]` or `["console.log"]` +/// +/// Besides any functions matching that name, any properties on a pure function +/// and any functions returned from a pure function will also be considered pure. +/// For example, if `["console.log"]` is specified: +/// - `console.log()` is pure +/// - `console.log.foo()` is pure (property on pure function) +/// - `console.log()()` is pure (function returned from pure function) +/// - `console.log().foo()` is pure (property on returned function) +pub fn is_pure_function(callee: &Expression, pure_functions: &[String]) -> bool { + if pure_functions.is_empty() { + return false; + } + let Some(path_parts) = extract_callee_path(callee) else { + return false; + }; + pure_functions.iter().any(|pure_fn| is_path_match(&path_parts, pure_fn)) +} + +/// Extract the path parts from a callee expression. +/// Returns None if the callee cannot be matched (e.g., contains non-string computed properties). +/// The returned Vec contains path parts in reverse order (from outermost to root). +fn extract_callee_path<'a>(callee: &'a Expression<'a>) -> Option> { + let mut path_parts: Vec<&str> = Vec::new(); + let mut current = callee; + + loop { + match current { + Expression::Identifier(ident) => { + path_parts.push(ident.name.as_str()); + break; + } + Expression::StaticMemberExpression(member) => { + path_parts.push(member.property.name.as_str()); + current = &member.object; + } + Expression::ComputedMemberExpression(member) => { + let Expression::StringLiteral(lit) = &member.expression else { + return None; + }; + path_parts.push(lit.value.as_str()); + current = &member.object; + } + Expression::CallExpression(call) => { + // Call expressions don't add to the path, they just pass through + // But they do "seal" the previous path - anything before a call is an extension + path_parts.clear(); + current = &call.callee; + } + Expression::ChainExpression(chain) => match &chain.expression { + ChainElement::StaticMemberExpression(member) => { + path_parts.push(member.property.name.as_str()); + current = &member.object; + } + ChainElement::ComputedMemberExpression(member) => { + let Expression::StringLiteral(lit) = &member.expression else { + return None; + }; + path_parts.push(lit.value.as_str()); + current = &member.object; + } + ChainElement::CallExpression(call) => { + // Call expressions don't add to the path, they just pass through + // But they do "seal" the previous path - anything before a call is an extension + path_parts.clear(); + current = &call.callee; + } + ChainElement::TSNonNullExpression(ts) => { + current = &ts.expression; + } + ChainElement::PrivateFieldExpression(_) => { + return None; + } + }, + Expression::ParenthesizedExpression(paren) => { + current = &paren.expression; + } + _ => { + return None; + } + } + } + + Some(path_parts) +} + +/// Check if the extracted path matches the given pure function name. +/// The pure function name can be a dotted path like "console.log". +/// Returns true if the pure function name is a prefix of the callee's path. +fn is_path_match(path_parts: &[&str], pure_fn: &str) -> bool { + let pure_parts_count = pure_fn.bytes().filter(|&b| b == b'.').count() + 1; + if pure_parts_count > path_parts.len() { + return false; + } + pure_fn.split('.').zip(path_parts.iter().rev()).all(|(a, b)| a == *b) +} diff --git a/crates/oxc_minifier/src/ctx.rs b/crates/oxc_minifier/src/ctx.rs index c31f9b2521530..6e8a657d5e6b8 100644 --- a/crates/oxc_minifier/src/ctx.rs +++ b/crates/oxc_minifier/src/ctx.rs @@ -5,7 +5,7 @@ use oxc_ecmascript::{ constant_evaluation::{ ConstantEvaluation, ConstantEvaluationCtx, ConstantValue, binary_operation_evaluate_value, }, - side_effects::{MayHaveSideEffects, PropertyReadSideEffects}, + side_effects::{MayHaveSideEffects, PropertyReadSideEffects, is_pure_function}, }; use oxc_semantic::{IsGlobalReference, Scoping, SymbolId}; use oxc_span::format_atom; @@ -67,16 +67,11 @@ impl<'a> oxc_ecmascript::side_effects::MayHaveSideEffectsContext<'a> for Ctx<'a, } fn manual_pure_functions(&self, callee: &Expression) -> bool { - if let Expression::Identifier(ident) = callee { - return self - .state - .options - .treeshake - .manual_pure_functions - .iter() - .any(|name| ident.name.as_str() == name); + let pure_functions = &self.state.options.treeshake.manual_pure_functions; + if pure_functions.is_empty() { + return false; } - false + is_pure_function(callee, pure_functions) } fn property_read_side_effects(&self) -> PropertyReadSideEffects { diff --git a/crates/oxc_minifier/tests/ecmascript/is_pure_function.rs b/crates/oxc_minifier/tests/ecmascript/is_pure_function.rs new file mode 100644 index 0000000000000..e7f43cb4416cd --- /dev/null +++ b/crates/oxc_minifier/tests/ecmascript/is_pure_function.rs @@ -0,0 +1,164 @@ +use oxc_allocator::Allocator; +use oxc_ast::ast::{ChainElement, Expression, Statement}; +use oxc_ecmascript::side_effects::is_pure_function; +use oxc_parser::Parser; +use oxc_span::SourceType; + +#[track_caller] +fn test(source: &str, pure_functions: &[&str], expected: bool) { + let allocator = Allocator::default(); + let ret = Parser::new(&allocator, source, SourceType::mjs()).parse(); + assert!(!ret.panicked, "{source}"); + assert!(ret.errors.is_empty(), "{source}"); + + let Some(Statement::ExpressionStatement(stmt)) = ret.program.body.first() else { + panic!("should have an expression statement body: {source}"); + }; + let callee = get_callee(stmt.expression.without_parentheses(), source); + + let pure_fns: Vec = pure_functions.iter().map(ToString::to_string).collect(); + assert_eq!( + is_pure_function(callee, &pure_fns), + expected, + "{source} with pure_functions={pure_functions:?}" + ); +} + +fn get_callee<'a>(expr: &'a Expression<'a>, source: &str) -> &'a Expression<'a> { + match expr { + Expression::CallExpression(call) => &call.callee, + Expression::NewExpression(new_expr) => &new_expr.callee, + Expression::TaggedTemplateExpression(tagged) => &tagged.tag, + Expression::ChainExpression(chain) => match &chain.expression { + ChainElement::CallExpression(call) => &call.callee, + _ => panic!("should have a call expression inside chain: {source}"), + }, + _ => panic!("should have a call, new, or tagged template expression: {source}"), + } +} + +#[test] +fn test_simple_identifiers() { + test("foo()", &["foo"], true); + test("bar()", &["foo"], false); + test("Foo()", &["foo"], false); + test("foo()", &["Foo"], false); + test("Foo()", &["Foo"], true); + test("foo()", &["foo", "bar"], true); + test("bar()", &["foo", "bar"], true); + test("baz()", &["foo", "bar"], false); +} + +#[test] +fn test_member_expressions() { + test("console.log()", &["console"], true); + test("console['log']()", &["console"], true); + test("console.warn()", &["console"], true); + test("other.log()", &["console"], false); + test("other.console.log()", &["console"], false); + test("other['console'].log()", &["console"], false); + test("console.log()", &["console.log"], true); + test("console['log']()", &["console.log"], true); + test("console.warn()", &["console.log"], false); + test("console['warn']()", &["console.log"], false); + test("console.console.console()", &["console"], true); + test("console.foo.console()", &["console"], true); + test("console.other.log()", &["console.log"], false); + + test("a.b.c()", &["a"], true); + test("a.b.c()", &["a.b"], true); + test("a.b.c()", &["a.b.c"], true); + test("a['b'].c()", &["a.b.c"], true); + test("a.b['c']()", &["a.b.c"], true); + test("a['b']['c']()", &["a.b.c"], true); + test("a.b.d()", &["a.b.c"], false); + test("a.b()", &["a.b.c"], false); +} + +#[test] +fn test_chained_calls() { + test("styled()()", &["styled"], true); + test("styled()('div')", &["styled"], true); + test("other()()", &["styled"], false); + test("console.log()()", &["console.log"], true); + test("console.log().foo()", &["console.log"], true); + + test("styled().div()", &["styled"], true); + test("styled().button()", &["styled"], true); + test("other().div()", &["styled"], false); + test("console.log.foo()", &["console.log"], true); + test("console.log.bar.baz()", &["console.log"], true); + + test("a()()()", &["a"], true); + test("a()().b()", &["a"], true); + test("a().b().c()", &["a"], true); +} + +#[test] +fn test_optional_chaining() { + test("styled?.()", &["styled"], true); + test("other?.()", &["styled"], false); + test("styled?.div()", &["styled"], true); + test("console?.log()", &["console"], true); + test("console?.log()", &["console.log"], true); + test("console?.warn()", &["console.log"], false); +} + +#[test] +fn test_new_expressions() { + test("new Foo()", &["Foo"], true); + test("new Bar()", &["Foo"], false); + + test("new styled.div()", &["styled"], true); + test("new styled.div()", &["styled.div"], true); + test("new styled.button()", &["styled.div"], false); +} + +#[test] +fn test_tagged_template_expressions() { + test("foo``", &["foo"], true); + test("bar``", &["foo"], false); + + test("styled.div``", &["styled"], true); + test("other.div``", &["styled"], false); + test("styled.div``", &["styled.div"], true); +} + +#[test] +fn test_edge_cases() { + test("foo()", &[], false); + test("console.log()", &[], false); + test("styled()()", &[], false); + + test("(foo)()", &["foo"], true); + test("(bar)()", &["foo"], false); + test("((foo))()", &["foo"], true); + test("(((console.log)))()", &["console.log"], true); + + test("foo()", &[""], false); // should not match anything + test("console.log()", &["console."], false); // should not match anything + test("console()", &["console."], false); // should not match anything + test("console.log()", &["."], false); // should not match anything +} + +/// Based on https://github.com/rollup/rollup/blob/v4.53.3/test/form/samples/manual-pure-functions +#[test] +fn test_rollup_manual_pure_functions() { + test("foo()", &["foo", "bar.a"], true); + test("foo.a()", &["foo", "bar.a"], true); + test("foo.a()()", &["foo", "bar.a"], true); + test("foo.a().a()", &["foo", "bar.a"], true); + test("foo.a().a()()", &["foo", "bar.a"], true); + test("foo.a().a().a()", &["foo", "bar.a"], true); + + test("bar()", &["foo", "bar.a"], false); + test("bar.b()", &["foo", "bar.a"], false); + + test("bar.a()", &["foo", "bar.a"], true); + test("bar?.a()", &["foo", "bar.a"], true); + test("bar.a.a()", &["foo", "bar.a"], true); + test("bar.a()()", &["foo", "bar.a"], true); + test("bar.a().a()", &["foo", "bar.a"], true); + test("bar.a()()()", &["foo", "bar.a"], true); + test("bar.a()().a()", &["foo", "bar.a"], true); +} diff --git a/crates/oxc_minifier/tests/ecmascript/may_have_side_effects.rs b/crates/oxc_minifier/tests/ecmascript/may_have_side_effects.rs index 86df2b3d15e02..8fa95a9ffc932 100644 --- a/crates/oxc_minifier/tests/ecmascript/may_have_side_effects.rs +++ b/crates/oxc_minifier/tests/ecmascript/may_have_side_effects.rs @@ -6,7 +6,9 @@ use oxc_allocator::Allocator; use oxc_ast::ast::{Expression, IdentifierReference, Statement}; use oxc_ecmascript::{ GlobalContext, - side_effects::{MayHaveSideEffects, MayHaveSideEffectsContext, PropertyReadSideEffects}, + side_effects::{ + MayHaveSideEffects, MayHaveSideEffectsContext, PropertyReadSideEffects, is_pure_function, + }, }; use oxc_parser::Parser; use oxc_span::SourceType; @@ -47,11 +49,7 @@ impl MayHaveSideEffectsContext<'_> for Ctx { } fn manual_pure_functions(&self, callee: &Expression) -> bool { - if let Expression::Identifier(id) = callee { - self.pure_function_names.iter().any(|name| name == id.name.as_str()) - } else { - false - } + is_pure_function(callee, &self.pure_function_names) } fn property_read_side_effects(&self) -> PropertyReadSideEffects { @@ -989,6 +987,19 @@ fn test_is_pure_call_support() { test_with_ctx("bar``", &ctx, true); } +#[test] +fn test_manual_pure_functions_with_dotted_names() { + let ctx = Ctx { pure_function_names: vec!["console".to_string()], ..Default::default() }; + test_with_ctx("console()", &ctx, false); + test_with_ctx("console.log()", &ctx, false); + test_with_ctx("console.log(bar())", &ctx, true); + test_with_ctx("other.log()", &ctx, true); + let ctx = Ctx { pure_function_names: vec!["console.log".to_string()], ..Default::default() }; + test_with_ctx("console.log()", &ctx, false); + test_with_ctx("console.warn()", &ctx, true); + test_with_ctx("console.log.foo()", &ctx, false); +} + #[test] fn test_property_read_side_effects_support() { let all_ctx = diff --git a/crates/oxc_minifier/tests/ecmascript/mod.rs b/crates/oxc_minifier/tests/ecmascript/mod.rs index 6f75192cd8458..4465e233382cb 100644 --- a/crates/oxc_minifier/tests/ecmascript/mod.rs +++ b/crates/oxc_minifier/tests/ecmascript/mod.rs @@ -1,6 +1,7 @@ mod array_join; mod is_int32_or_uint32; mod is_literal_value; +mod is_pure_function; mod may_have_side_effects; mod may_have_side_effects_statements; mod prop_name; diff --git a/crates/oxc_minifier/tests/peephole/manual_pure_functions.rs b/crates/oxc_minifier/tests/peephole/manual_pure_functions.rs new file mode 100644 index 0000000000000..2bc255f2ed2de --- /dev/null +++ b/crates/oxc_minifier/tests/peephole/manual_pure_functions.rs @@ -0,0 +1,359 @@ +use oxc_minifier::{CompressOptions, CompressOptionsUnused, TreeShakeOptions}; + +use crate::{default_options, test_options}; + +#[track_caller] +fn test(source_text: &str, expected: &str, pure_functions: &[&str]) { + let options = CompressOptions { + treeshake: TreeShakeOptions { + manual_pure_functions: pure_functions.iter().map(ToString::to_string).collect(), + ..TreeShakeOptions::default() + }, + unused: CompressOptionsUnused::Remove, + ..default_options() + }; + test_options(source_text, expected, &options); +} + +// Tests from Terser pure_funcs.js +// https://github.com/terser/terser/blob/v5.44.1/test/compress/pure_funcs.js +mod terser_tests { + use super::test; + + #[test] + #[ignore = "FIXME"] + fn array() { + test( + " + var a; + export function f(b) { + Math.floor(a / b); + Math.floor(c / b); + } + ", + "export function f(b) {}", + &["Math.floor"], + ); + } + + #[test] + #[ignore = "FIXME"] + fn side_effects() { + test( + " + export function f(a, b) { + console.log(a()); + console.log(b); + } + ", + " + export function f(a, b) { + a(); + } + ", + &["console.log"], + ); + } + + #[test] + #[ignore = "FIXME"] + fn unused() { + test( + " + export function foo() { + var u = pure(1); + var x = pure(2); + var y = pure(x); + var z = pure(pure(side_effects())); + return pure(3); + } + ", + " + export function foo() { + side_effects(); + return pure(3); + } + ", + &["pure"], + ); + } + + #[test] + fn babel() { + test( + r#" + function _classCallCheck(instance, Constructor) { + if (!(instance instanceof Constructor)) + throw new TypeError("Cannot call a class as a function"); + } + export var Foo = function Foo() { + _classCallCheck(this, Foo); + }; + "#, + r" + export var Foo = function() {}; + ", + &["_classCallCheck"], + ); + } + + #[test] + #[ignore = "FIXME"] + fn conditional() { + test( + " + pure(1 | a() ? 2 & b() : 7 ^ c()); + pure(1 | a() ? 2 & b() : 5); + pure(1 | a() ? 4 : 7 ^ c()); + pure(1 | a() ? 4 : 5); + pure(3 ? 2 & b() : 7 ^ c()); + pure(3 ? 2 & b() : 5); + pure(3 ? 4 : 7 ^ c()); + pure(3 ? 4 : 5); + ", + " + 1 | a() ? b() : c(), + 1 | a() && b(), + 1 | a() || c(), + a(), + 3 ? b() : c(), + 3 && b(), + 3 || c() + ", + &["pure"], + ); + } + + #[test] + #[ignore = "FIXME"] + fn relational() { + test( + r#" + foo() in foo(); + foo() instanceof bar(); + foo() < "bar"; + bar() > foo(); + bar() != bar(); + bar() !== "bar"; + "bar" == foo(); + "bar" === bar(); + "bar" >= "bar"; + "#, + " + bar(), + bar(), + bar(), bar(), + bar(), + bar() + ", + &["foo"], + ); + } + + #[test] + #[ignore = "FIXME"] + fn arithmetic() { + test( + r#" + foo() + foo(); + foo() - bar(); + foo() * "bar"; + bar() / foo(); + bar() & bar(); + bar() | "bar"; + "bar" >> foo(); + "bar" << bar(); + "bar" >>> "bar"; + "#, + " + bar(), + bar(), + bar(), bar(), + bar(), + bar() + ", + &["foo"], + ); + } + + #[test] + #[ignore = "FIXME"] + fn boolean_and() { + // Test logical AND with pure function calls + test( + r#" + foo() && foo(); + foo() && bar(); + foo() && "bar"; + bar() && foo(); + bar() && bar(); + bar() && "bar"; + "bar" && foo(); + "bar" && bar(); + "bar" && "bar"; + "#, + r#" + foo() && bar(), + bar(), + bar() && bar(), + bar(), + "bar" && bar() + "#, + &["foo"], + ); + } + + #[test] + fn boolean_or() { + // Test logical OR with pure function calls + test( + r#" + foo() || foo(); + foo() || bar(); + foo() || "bar"; + bar() || foo(); + bar() || bar(); + bar() || "bar"; + "bar" || foo(); + "bar" || bar(); + "bar" || "bar"; + "#, + r" + foo() || bar(), + bar(), + bar() || bar(), + bar() + ", + &["foo"], + ); + } + + #[test] + fn assign() { + test( + " + var a; + export function f(b) { + a = foo(); + b *= 4 + foo(); + c >>= 0 | foo(); + } + ", + " + export function f(b) { + b *= 4 + foo(), c >>= 0 | foo(); + } + ", + &["foo"], + ); + } + + #[test] + #[ignore = "FIXME"] + fn unary() { + test( + r#" + typeof foo(); + typeof bar(); + typeof "bar"; + void foo(); + void bar(); + void "bar"; + delete a[foo()]; + delete a[bar()]; + delete a["bar"]; + a[foo()]++; + a[bar()]++; + a["bar"]++; + --a[foo()]; + --a[bar()]; + --a["bar"]; + ~foo(); + ~bar(); + ~"bar"; + "#, + " + bar(), + bar(), + delete a[foo()], + delete a[bar()], + delete a.bar, + a[foo()]++, + a[bar()]++, + a.bar++, + --a[foo()], + --a[bar()], + --a.bar, + bar() + ", + &["foo"], + ); + } + + #[test] + fn issue_3065_1() { + test( + " + function modifyWrapper(a, f, wrapper) { + wrapper.a = a; + wrapper.f = f; + return wrapper; + } + function pureFunc(fun) { + return modifyWrapper(1, fun, function(a) { + return fun(a); + }); + } + var unused = pureFunc(function(x) { + return x; + }); + ", + "", + &["pureFunc"], + ); + } + + #[test] + #[ignore = "FIXME"] + fn issue_3065_3() { + test( + r#" + function debug(msg) { + console.log(msg); + } + debug(function() { + console.log("PASS"); + return "FAIL"; + }()); + "#, + r#" + (function() { + console.log("PASS"); + })(); + "#, + &["debug"], + ); + } + + #[test] + #[ignore = "FIXME"] + fn issue_3065_4() { + test( + r#" + var debug = function(msg) { + console.log(msg); + }; + debug(function() { + console.log("PASS"); + return "FAIL"; + }()); + "#, + r#" + (function() { + console.log("PASS"); + })(); + "#, + &["debug"], + ); + } +} diff --git a/crates/oxc_minifier/tests/peephole/mod.rs b/crates/oxc_minifier/tests/peephole/mod.rs index 43de31ddf2e50..ac71f7b33ae6a 100644 --- a/crates/oxc_minifier/tests/peephole/mod.rs +++ b/crates/oxc_minifier/tests/peephole/mod.rs @@ -2,6 +2,7 @@ mod collapse_variable_declarations; mod dead_code_elimination; mod esbuild; mod inline_single_use_variable; +mod manual_pure_functions; mod merge_assignments_to_declarations; mod minimize_exit_points; mod obscure_edge_cases;