From 38e4b535f71094c6df119ce1fb7413bffec2fdc8 Mon Sep 17 00:00:00 2001 From: Boshen Date: Sun, 18 Jan 2026 01:52:07 +0000 Subject: [PATCH] fix(minifier): validate RegExp patterns before marking as pure (#18125) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Invalid RegExp patterns like `RegExp("[")` or invalid flags like `RegExp("a", "xyz")` throw `SyntaxError` at runtime, but the minifier was incorrectly removing them. This change uses `oxc_regular_expression` to validate patterns at compile time: - Valid patterns are marked as pure and can be removed when unused - Invalid patterns or non-literal arguments are kept (preserving runtime errors) ### Behavior | Input | Valid? | Removed? | |-------|--------|----------| | `RegExp()` | ✅ | ✅ | | `RegExp('a', 'g')` | ✅ | ✅ | | `RegExp(/foo/)` | ✅ | ✅ | | `RegExp('[')` | ❌ | ❌ (throws SyntaxError) | | `RegExp('a', 'xyz')` | ❌ | ❌ (throws SyntaxError) | | `RegExp(variable)` | ? | ❌ (can't determine) | Fixes #18050 ## Test plan - [x] Updated existing tests to reflect correct behavior - [x] Added tests for invalid patterns and flags - [x] `cargo test -p oxc_minifier` passes - [x] `cargo test -p oxc_ecmascript` passes 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- Cargo.lock | 1 + crates/oxc_ecmascript/Cargo.toml | 1 + .../src/side_effects/expressions.rs | 57 +++++++++++++++---- crates/oxc_ecmascript/src/side_effects/mod.rs | 1 + crates/oxc_minifier/src/peephole/normalize.rs | 16 ++++-- .../tests/ecmascript/may_have_side_effects.rs | 13 ++++- .../oxc_minifier/tests/peephole/normalize.rs | 20 +++++-- .../peephole/substitute_alternate_syntax.rs | 19 +++++-- .../snapshots/minifier_node_compat.snap | 4 +- .../allocs_minifier.snap | 2 +- 10 files changed, 100 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7be6ccefe4928..b94514ba278b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1906,6 +1906,7 @@ dependencies = [ "num-traits", "oxc_allocator", "oxc_ast", + "oxc_regular_expression", "oxc_span", "oxc_syntax", ] diff --git a/crates/oxc_ecmascript/Cargo.toml b/crates/oxc_ecmascript/Cargo.toml index 91ca5da7e2bda..64457c2e6a0f6 100644 --- a/crates/oxc_ecmascript/Cargo.toml +++ b/crates/oxc_ecmascript/Cargo.toml @@ -23,6 +23,7 @@ doctest = false [dependencies] oxc_allocator = { workspace = true } oxc_ast = { workspace = true } +oxc_regular_expression = { workspace = true } oxc_span = { workspace = true } oxc_syntax = { workspace = true, features = ["to_js_string"] } diff --git a/crates/oxc_ecmascript/src/side_effects/expressions.rs b/crates/oxc_ecmascript/src/side_effects/expressions.rs index 53eee989f3157..62bc50d9f8ec7 100644 --- a/crates/oxc_ecmascript/src/side_effects/expressions.rs +++ b/crates/oxc_ecmascript/src/side_effects/expressions.rs @@ -221,15 +221,50 @@ impl<'a> MayHaveSideEffects<'a> for BinaryExpression<'a> { } } -fn is_pure_regexp(name: &str, args: &[Argument<'_>]) -> bool { - name == "RegExp" - && match args.len() { - 0 | 1 => true, - 2 => args[1].as_expression().is_some_and(|e| { - matches!(e, Expression::Identifier(_) | Expression::StringLiteral(_)) - }), - _ => false, - } +/// Validate a RegExp constructor call using the regex parser. +/// +/// Returns `true` if the pattern and flags are valid (pure/side-effect free), +/// `false` if invalid or cannot be statically determined. +/// +/// Invalid patterns like `RegExp("[")` or invalid flags like `RegExp("a", "xyz")` throw SyntaxError, +/// so they are NOT pure. +/// +/// See +pub fn is_valid_regexp(args: &[Argument<'_>]) -> bool { + // Extract pattern from first argument + let pattern = match args.first() { + // No arguments: `RegExp()` is valid, returns /(?:)/ + None => "", + Some(arg) => match arg.as_expression() { + // RegExp literal argument: `RegExp(/foo/)` is always valid + Some(Expression::RegExpLiteral(_)) => return true, + // String literal: extract the pattern to validate + Some(Expression::StringLiteral(s)) => s.value.as_str(), + // Non-literal argument: can't statically determine, assume side effects + _ => return false, + }, + }; + + // Extract flags from second argument + let flags = match args.get(1) { + None => None, + Some(arg) => match arg.as_expression() { + Some(Expression::StringLiteral(s)) => Some(s.value.as_str()), + // Non-literal flags: can't statically determine, assume side effects + _ => return false, + }, + }; + + // Use the regex parser to validate the pattern and flags + let allocator = oxc_allocator::Allocator::default(); + oxc_regular_expression::LiteralParser::new( + &allocator, + pattern, + flags, + oxc_regular_expression::Options::default(), + ) + .parse() + .is_ok() } #[rustfmt::skip] @@ -552,7 +587,7 @@ impl<'a> MayHaveSideEffects<'a> for CallExpression<'a> { && let name = ident.name.as_str() && (is_pure_global_function(name) || is_pure_call(name) - || is_pure_regexp(name, &self.arguments)) + || (name == "RegExp" && is_valid_regexp(&self.arguments))) { return self.arguments.iter().any(|e| e.may_have_side_effects(ctx)); } @@ -613,7 +648,7 @@ impl<'a> MayHaveSideEffects<'a> for NewExpression<'a> { if let Expression::Identifier(ident) = &self.callee && ctx.is_global_reference(ident) && let name = ident.name.as_str() - && (is_pure_constructor(name) || is_pure_regexp(name, &self.arguments)) + && (is_pure_constructor(name) || (name == "RegExp" && is_valid_regexp(&self.arguments))) { return self.arguments.iter().any(|e| e.may_have_side_effects(ctx)); } diff --git a/crates/oxc_ecmascript/src/side_effects/mod.rs b/crates/oxc_ecmascript/src/side_effects/mod.rs index 2d800153f201c..f9df3edfb1130 100644 --- a/crates/oxc_ecmascript/src/side_effects/mod.rs +++ b/crates/oxc_ecmascript/src/side_effects/mod.rs @@ -4,6 +4,7 @@ mod pure_function; mod statements; pub use context::{MayHaveSideEffectsContext, PropertyReadSideEffects}; +pub use expressions::is_valid_regexp; pub use pure_function::is_pure_function; /// Returns true if subtree changes application state. diff --git a/crates/oxc_minifier/src/peephole/normalize.rs b/crates/oxc_minifier/src/peephole/normalize.rs index 80295574d0661..a24b1c9fabf58 100644 --- a/crates/oxc_minifier/src/peephole/normalize.rs +++ b/crates/oxc_minifier/src/peephole/normalize.rs @@ -1,6 +1,9 @@ use oxc_allocator::{TakeIn, Vec}; use oxc_ast::ast::*; -use oxc_ecmascript::constant_evaluation::{DetermineValueType, ValueType}; +use oxc_ecmascript::{ + constant_evaluation::{DetermineValueType, ValueType}, + side_effects::is_valid_regexp, +}; use oxc_semantic::IsGlobalReference; use oxc_span::GetSpan; use oxc_syntax::scope::ScopeFlags; @@ -364,9 +367,14 @@ impl<'a> Normalize { ], ), "ArrayBuffer" | "Date" => (false, false, &[ValueType::BigInt]), - "Boolean" | "Error" | "EvalError" | "RangeError" | "ReferenceError" | "RegExp" - | "SyntaxError" | "TypeError" | "URIError" | "Number" | "Object" | "String" => { - (false, false, &[]) + "Boolean" | "Error" | "EvalError" | "RangeError" | "ReferenceError" | "SyntaxError" + | "TypeError" | "URIError" | "Number" | "Object" | "String" => (false, false, &[]), + // RegExp needs special validation using the regex parser + "RegExp" => { + if Self::can_set_pure(ident, &ctx) && is_valid_regexp(&new_expr.arguments) { + new_expr.pure = true; + } + return; } _ => return, }; diff --git a/crates/oxc_minifier/tests/ecmascript/may_have_side_effects.rs b/crates/oxc_minifier/tests/ecmascript/may_have_side_effects.rs index 1f4faccbabe53..231fb6dcf58c5 100644 --- a/crates/oxc_minifier/tests/ecmascript/may_have_side_effects.rs +++ b/crates/oxc_minifier/tests/ecmascript/may_have_side_effects.rs @@ -207,9 +207,14 @@ fn closure_compiler_tests() { test("templateFunction`template`", true); test("st = `${name}template`", true); test("tempFunc = templateFunction`template`", true); - test("new RegExp('foobar', 'i')", false); - test("new RegExp('foobar', 2)", true); - test("new RegExp(SomethingWacky(), 'i')", true); + // RegExp is validated using the regex parser to determine if it's pure. + // Valid patterns are pure, invalid patterns have side effects (throw SyntaxError). + // https://github.com/oxc-project/oxc/issues/18050 + test("new RegExp('foobar', 'i')", false); // Valid pattern and flags + test("new RegExp('foobar', 2)", true); // Non-string flags, can't validate + test("new RegExp(SomethingWacky(), 'i')", true); // Non-literal pattern, can't validate + test("new RegExp('[')", true); // Invalid pattern + test("new RegExp('a', 'xyz')", true); // Invalid flags // test("new Array()", false); // test("new Array", false); // test("new Array(4)", false); @@ -797,6 +802,7 @@ fn test_new_expressions() { test("new EvalError", false); test("new RangeError", false); test("new ReferenceError", false); + // RegExp() with no arguments is valid (returns /(?:)/) test("new RegExp", false); test("new SyntaxError", false); test("new TypeError", false); @@ -822,6 +828,7 @@ fn test_call_expressions() { test("EvalError()", false); test("RangeError()", false); test("ReferenceError()", false); + // RegExp() with no arguments is valid (returns /(?:)/) test("RegExp()", false); test("SyntaxError()", false); test("TypeError()", false); diff --git a/crates/oxc_minifier/tests/peephole/normalize.rs b/crates/oxc_minifier/tests/peephole/normalize.rs index 10a46b6059d89..66f3f7565f840 100644 --- a/crates/oxc_minifier/tests/peephole/normalize.rs +++ b/crates/oxc_minifier/tests/peephole/normalize.rs @@ -79,6 +79,7 @@ fn pure_constructors() { test("new Object", ""); test("new RangeError", ""); test("new ReferenceError", ""); + // RegExp with no arguments is valid (returns /(?:)/) and can be removed test("new RegExp", ""); test("new Set", ""); test("new String", ""); @@ -100,7 +101,8 @@ fn pure_constructors() { test("new Object(null)", ""); test("new RangeError(null)", ""); test("new ReferenceError(null)", ""); - test("new RegExp(null)", ""); + // null is not a string literal, can't statically validate + test("new RegExp(null)", "RegExp(null)"); test("new Set(null)", ""); test("new String(null)", ""); test("new SyntaxError(null)", ""); @@ -121,7 +123,8 @@ fn pure_constructors() { test("new Object(undefined)", ""); test("new RangeError(undefined)", ""); test("new ReferenceError(undefined)", ""); - test("new RegExp(undefined)", ""); + // undefined is not a string literal, can't statically validate + test("new RegExp(undefined)", "RegExp(void 0)"); test("new Set(undefined)", ""); test("new String(undefined)", ""); test("new SyntaxError(undefined)", ""); @@ -142,7 +145,8 @@ fn pure_constructors() { test("new Object(0)", ""); test("new RangeError(0)", ""); test("new ReferenceError(0)", ""); - test("new RegExp(0)", ""); + // 0 is not a string literal, can't statically validate + test("new RegExp(0)", "RegExp(0)"); test_same("new Set(0)"); test("new String(0)", ""); test("new SyntaxError(0)", ""); @@ -163,7 +167,8 @@ fn pure_constructors() { test("new Object(10n)", ""); test("new RangeError(10n)", ""); test("new ReferenceError(10n)", ""); - test("new RegExp(10n)", ""); + // 10n is not a string literal, can't statically validate + test("new RegExp(10n)", "RegExp(10n)"); test_same("new Set(10n)"); test("new String(10n)", ""); test("new SyntaxError(10n)", ""); @@ -184,6 +189,7 @@ fn pure_constructors() { test("new Object('')", ""); test("new RangeError('')", ""); test("new ReferenceError('')", ""); + // Empty string is a valid pattern (matches everything) test("new RegExp('')", ""); test("new Set('')", ""); test("new String('')", ""); @@ -205,7 +211,8 @@ fn pure_constructors() { test("new Object(!0)", ""); test("new RangeError(!0)", ""); test("new ReferenceError(!0)", ""); - test("new RegExp(!0)", ""); + // !0 is not a string literal, can't statically validate + test("new RegExp(!0)", "RegExp(!0)"); test_same("new Set(!0)"); test("new String(!0)", ""); test("new SyntaxError(!0)", ""); @@ -226,7 +233,8 @@ fn pure_constructors() { test("new Object([])", ""); test("new RangeError([])", ""); test("new ReferenceError([])", ""); - test("new RegExp([])", ""); + // Array arguments are object type, so conversion doesn't happen + test_same("new RegExp([])"); test("new Set([])", ""); test("new String([])", ""); test("new SyntaxError([])", ""); diff --git a/crates/oxc_minifier/tests/peephole/substitute_alternate_syntax.rs b/crates/oxc_minifier/tests/peephole/substitute_alternate_syntax.rs index c856e3ab302fa..f7bfaccea0a0b 100644 --- a/crates/oxc_minifier/tests/peephole/substitute_alternate_syntax.rs +++ b/crates/oxc_minifier/tests/peephole/substitute_alternate_syntax.rs @@ -228,13 +228,20 @@ fn test_fold_new_expressions() { test("new Function('a', 'b', 'console.log(a, b)')", "Function('a', 'b', 'console.log(a, b)')"); test_same("var Function; new Function()"); - test("new RegExp()", ""); - test("new RegExp('a')", ""); - test("new RegExp(0)", ""); - test("new RegExp(null)", ""); - test("x = new RegExp('a', 'g')", "x = RegExp('a', 'g')"); - test_same("new RegExp(foo)"); + // RegExp is validated using the regex parser to determine if it's pure. + // Valid patterns can be removed, invalid patterns must be kept. + // https://github.com/oxc-project/oxc/issues/18050 + test("new RegExp()", ""); // Valid: empty pattern + test("new RegExp('a')", ""); // Valid: simple pattern + test("new RegExp(0)", "RegExp(0)"); // Can't validate non-string literal + test("new RegExp(null)", "RegExp(null)"); // Can't validate non-string literal + test("x = new RegExp('a', 'g')", "x = /* @__PURE__ */ RegExp('a', 'g')"); // Valid pattern, marked as pure + test_same("new RegExp(foo)"); // Can't validate variable + // RegExp literal is always valid test("new RegExp(/foo/)", ""); + // Invalid patterns and flags must not be removed (they throw SyntaxError at runtime) + test_same("RegExp('[')"); + test_same("RegExp('a', 'xyz')"); } #[test] diff --git a/tasks/coverage/snapshots/minifier_node_compat.snap b/tasks/coverage/snapshots/minifier_node_compat.snap index ab4753712a2c1..2a79eab5c1049 100644 --- a/tasks/coverage/snapshots/minifier_node_compat.snap +++ b/tasks/coverage/snapshots/minifier_node_compat.snap @@ -2,13 +2,11 @@ commit: 499beb6f minifier_node_compat Summary: AST Parsed : 938/938 (100.00%) -Positive Passed: 931/938 (99.25%) +Positive Passed: 932/938 (99.36%) execution_result: tasks/coverage/ES2015/built-ins›Proxy›"getOwnPropertyDescriptor" handler invariants execution_result: tasks/coverage/ES2015/misc›Proxy, internal 'get' calls›HasBinding -execution_result: tasks/coverage/ES2015/misc›Proxy, internal 'get' calls›RegExp constructor - execution_result: tasks/coverage/ES2015/misc›Proxy, internal 'get' calls›String.raw execution_result: tasks/coverage/ES2015/misc›Proxy, internal 'ownKeys' calls›TestIntegrityLevel diff --git a/tasks/track_memory_allocations/allocs_minifier.snap b/tasks/track_memory_allocations/allocs_minifier.snap index 2f33000316ad0..ceabd4e16775e 100644 --- a/tasks/track_memory_allocations/allocs_minifier.snap +++ b/tasks/track_memory_allocations/allocs_minifier.snap @@ -8,7 +8,7 @@ RadixUIAdoptionSection.jsx | 2.52 kB || 65 | 3 | pdf.mjs | 567.30 kB || 4691 | 569 || 47464 | 7730 -antd.js | 6.69 MB || 10723 | 2505 || 331648 | 69358 +antd.js | 6.69 MB || 10732 | 2514 || 331644 | 69358 binder.ts | 193.08 kB || 431 | 119 || 7075 | 824