From 9184911eacc1c64df96ad6cc3b2a60e9f618d20c Mon Sep 17 00:00:00 2001 From: sapphi-red <49056869+sapphi-red@users.noreply.github.com> Date: Sun, 31 Aug 2025 15:15:49 +0000 Subject: [PATCH] fix(regular_expression): detect usage of unsupported syntax recursively (#13470) unsupported syntax inside groups and others were not detected properly. --- .../src/ast_impl/support.rs | 71 ++++++++++++------- .../input.js | 9 ++- .../output.js | 8 ++- .../input.js | 6 +- .../output.js | 6 +- 5 files changed, 66 insertions(+), 34 deletions(-) diff --git a/crates/oxc_regular_expression/src/ast_impl/support.rs b/crates/oxc_regular_expression/src/ast_impl/support.rs index 12a5daa15204e..2f84d3117af44 100644 --- a/crates/oxc_regular_expression/src/ast_impl/support.rs +++ b/crates/oxc_regular_expression/src/ast_impl/support.rs @@ -1,4 +1,7 @@ -use crate::ast::{CharacterClass, CharacterClassContents, LookAroundAssertionKind, Pattern, Term}; +use crate::ast::{ + Alternative, CharacterClass, CharacterClassContents, Disjunction, LookAroundAssertionKind, + Pattern, Term, +}; pub struct RegexUnsupportedPatterns { pub named_capture_groups: bool, @@ -13,34 +16,54 @@ pub fn has_unsupported_regular_expression_pattern( pattern: &Pattern, unsupported: &RegexUnsupportedPatterns, ) -> bool { - pattern.body.body.iter().any(|alternative| { - alternative.body.iter().any(|term| term_contains_unsupported(term, unsupported)) - }) + disjunction_contains_unsupported(&pattern.body, unsupported) } -fn term_contains_unsupported(mut term: &Term, unsupported: &RegexUnsupportedPatterns) -> bool { - // Loop because `Term::Quantifier` contains a nested `Term` - loop { - match term { - Term::CapturingGroup(group) => { - return group.name.is_some() && unsupported.named_capture_groups; - } - Term::UnicodePropertyEscape(_) => return unsupported.unicode_property_escapes, - Term::CharacterClass(character_class) => { - return unsupported.unicode_property_escapes - && character_class_has_unicode_property_escape(character_class); +fn disjunction_contains_unsupported( + disjunction: &Disjunction, + unsupported: &RegexUnsupportedPatterns, +) -> bool { + disjunction + .body + .iter() + .any(|alternative| alternative_contains_unsupported(alternative, unsupported)) +} + +fn alternative_contains_unsupported( + alternative: &Alternative, + unsupported: &RegexUnsupportedPatterns, +) -> bool { + alternative.body.iter().any(|term| term_contains_unsupported(term, unsupported)) +} + +fn term_contains_unsupported(term: &Term, unsupported: &RegexUnsupportedPatterns) -> bool { + match term { + Term::LookAroundAssertion(assertion) => { + if unsupported.look_behind_assertions + && matches!( + assertion.kind, + LookAroundAssertionKind::Lookbehind + | LookAroundAssertionKind::NegativeLookbehind + ) + { + return true; } - Term::LookAroundAssertion(assertion) => { - return unsupported.look_behind_assertions - && matches!( - assertion.kind, - LookAroundAssertionKind::Lookbehind - | LookAroundAssertionKind::NegativeLookbehind - ); + disjunction_contains_unsupported(&assertion.body, unsupported) + } + Term::Quantifier(quantifier) => term_contains_unsupported(&quantifier.body, unsupported), + Term::UnicodePropertyEscape(_) => unsupported.unicode_property_escapes, + Term::CharacterClass(character_class) => { + unsupported.unicode_property_escapes + && character_class_has_unicode_property_escape(character_class) + } + Term::CapturingGroup(group) => { + if group.name.is_some() && unsupported.named_capture_groups { + return true; } - Term::Quantifier(quantifier) => term = &quantifier.body, - _ => return false, + disjunction_contains_unsupported(&group.body, unsupported) } + Term::IgnoreGroup(group) => disjunction_contains_unsupported(&group.body, unsupported), + _ => false, } } diff --git a/tasks/transform_conformance/tests/regexp/test/fixtures/all-regex-plugins-enabled-by-targets/input.js b/tasks/transform_conformance/tests/regexp/test/fixtures/all-regex-plugins-enabled-by-targets/input.js index 40bed4b16c673..ead4ff94616eb 100644 --- a/tasks/transform_conformance/tests/regexp/test/fixtures/all-regex-plugins-enabled-by-targets/input.js +++ b/tasks/transform_conformance/tests/regexp/test/fixtures/all-regex-plugins-enabled-by-targets/input.js @@ -9,11 +9,11 @@ a1 = /a.b/s // RegExpLookbehindAssertions b1 = /(?b)/ -c2 = /((?d)){4}/; // FIXME(sapphi-red): will be fixed in the next PR +c2 = /((?d)){4}/ // RegExpUnicodePropertyEscapes d1 = /\p{Emoji}/u // ES2022 @@ -22,3 +22,6 @@ f1 = /y/d // ES2024 // RegExpSetNotation g1 = /[\p{White_Space}&&\p{ASCII}]/v + +// Nested cases +nested1 = /(?:(?b)", ""); -c2 = /((?d)){4}/; +c2 = new RegExp("((?d)){4}", ""); d1 = new RegExp("\\p{Emoji}", "u"); f1 = new RegExp("y", "d"); g1 = new RegExp("[\\p{White_Space}&&\\p{ASCII}]", "v"); + +nested1 = new RegExp("(?:(?b)/ -c2 = /((?b)){2}/ // FIXME(sapphi-red): will be fixed in the next PR +c2 = /((?b)){2}/ -n1 = /(a)/ +nested1 = /(?b))/ +nested2 = /((?b))/ +nested3 = /(?:(?b))/ diff --git a/tasks/transform_conformance/tests/regexp/test/fixtures/transform-named-capturing-groups-regex/output.js b/tasks/transform_conformance/tests/regexp/test/fixtures/transform-named-capturing-groups-regex/output.js index c179c37555ea1..954036cb5c14d 100644 --- a/tasks/transform_conformance/tests/regexp/test/fixtures/transform-named-capturing-groups-regex/output.js +++ b/tasks/transform_conformance/tests/regexp/test/fixtures/transform-named-capturing-groups-regex/output.js @@ -1,4 +1,6 @@ c1 = new RegExp("(?b)", ""); -c2 = /((?b)){2}/; +c2 = new RegExp("((?b)){2}", ""); -n1 = /(a)/; +nested1 = new RegExp("(?b))", ""); +nested2 = new RegExp("((?b))", ""); +nested3 = new RegExp("(?:(?b))", "");