From a9a8e2ad414e4bd5298c54f624321083159168f3 Mon Sep 17 00:00:00 2001 From: camchenry <1514176+camchenry@users.noreply.github.com> Date: Sun, 22 Sep 2024 04:33:46 +0000 Subject: [PATCH] refactor(linter): use regex parser in `eslint/no-regex-spaces` (#5952) - part of https://github.com/oxc-project/oxc/issues/5416 Use the `oxc_regular_expression` parser to make these checks more robust. a few snapshots are updated because they now output more accurate diagnostics based on the regex AST. for example, `/ ?/` now correctly only highlights two spaces rather than three (because the last one is part of a quantifier) --- .../src/rules/eslint/no_regex_spaces.rs | 176 ++++++++++-------- .../src/snapshots/no_regex_spaces.snap | 19 +- 2 files changed, 111 insertions(+), 84 deletions(-) diff --git a/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs b/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs index f38e62e3c4612..ccc5f0592a39b 100644 --- a/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs +++ b/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs @@ -1,10 +1,16 @@ -use oxc_allocator::Vec; +use aho_corasick::AhoCorasick; +use lazy_static::lazy_static; +use oxc_allocator::{Allocator, Vec}; use oxc_ast::{ ast::{Argument, CallExpression, NewExpression, RegExpLiteral}, AstKind, }; use oxc_diagnostics::OxcDiagnostic; use oxc_macros::declare_oxc_lint; +use oxc_regular_expression::{ + ast::{Alternative, Disjunction, Pattern, Term}, + Parser, ParserOptions, +}; use oxc_span::Span; use crate::{context::LintContext, rule::Rule, AstNode}; @@ -38,8 +44,14 @@ declare_oxc_lint!( /// ``` NoRegexSpaces, restriction, + pending // TODO: This is somewhat autofixable, but the fixer does not exist yet. ); +lazy_static! { + static ref DOUBLE_SPACE: AhoCorasick = + AhoCorasick::new([" "]).expect("no-regex-spaces: Unable to build AhoCorasick"); +} + impl Rule for NoRegexSpaces { fn run<'a>(&self, node: &AstNode<'a>, ctx: &LintContext<'a>) { match node.kind() { @@ -70,18 +82,12 @@ impl NoRegexSpaces { fn find_literal_to_report(literal: &RegExpLiteral, ctx: &LintContext) -> Option { let pattern_text = literal.regex.pattern.source_text(ctx.source_text()); let pattern_text = pattern_text.as_ref(); - if Self::has_exempted_char_class(pattern_text) { + if !Self::has_double_space(pattern_text) { return None; } - if let Some((idx_start, idx_end)) = Self::find_consecutive_spaces_indices(pattern_text) { - let start = literal.span.start + u32::try_from(idx_start).unwrap() + 1; - let end = literal.span.start + u32::try_from(idx_end).unwrap() + 2; - - return Some(Span::new(start, end)); - } - - None + let pattern = literal.regex.pattern.as_pattern()?; + Self::find_consecutive_spaces(pattern) } fn find_expr_to_report(args: &Vec<'_, Argument<'_>>) -> Option { @@ -91,72 +97,60 @@ impl NoRegexSpaces { } } - if let Some(Argument::StringLiteral(pattern)) = args.first() { - if Self::has_exempted_char_class(&pattern.value) { - return None; // skip spaces inside char class, e.g. RegExp('[ ]') - } - - if let Some((idx_start, idx_end)) = - Self::find_consecutive_spaces_indices(&pattern.value) - { - let start = pattern.span.start + u32::try_from(idx_start).unwrap() + 1; - let end = pattern.span.start + u32::try_from(idx_end).unwrap() + 2; - - return Some(Span::new(start, end)); - } + let Some(Argument::StringLiteral(pattern)) = args.first() else { + return None; + }; + if !Self::has_double_space(&pattern.value) { + return None; } - None - } + let alloc = Allocator::default(); + let pattern_with_slashes = format!("/{}/", &pattern.value); + let parser = Parser::new(&alloc, pattern_with_slashes.as_str(), ParserOptions::default()); + let regex = parser.parse().ok()?; - fn find_consecutive_spaces_indices(input: &str) -> Option<(usize, usize)> { - let mut consecutive_spaces = 0; - let mut start: Option = None; - let mut inside_square_brackets: usize = 0; - - for (cur_idx, char) in input.char_indices() { - if char == '[' { - inside_square_brackets += 1; - } else if char == ']' { - inside_square_brackets = inside_square_brackets.saturating_sub(1); - } + Self::find_consecutive_spaces(®ex.pattern) + .map(|span| Span::new(span.start + pattern.span.start, span.end + pattern.span.start)) + } - if char != ' ' || inside_square_brackets > 0 { - // ignore spaces inside char class, including nested ones - consecutive_spaces = 0; - start = None; - continue; + fn find_consecutive_spaces(pattern: &Pattern) -> Option { + let mut last_space_span: Option = None; + let mut in_quantifier = false; + visit_terms(pattern, &mut |term| { + if let Term::Quantifier(_) = term { + in_quantifier = true; + return; } - - if start.is_none() { - start = Some(cur_idx); + let Term::Character(ch) = term else { + return; + }; + if in_quantifier { + in_quantifier = false; + return; } - - consecutive_spaces += 1; - - if consecutive_spaces < 2 { - continue; + if ch.value != u32::from(b' ') { + return; } - - // 2 or more consecutive spaces - - if let Some(next_char) = input.chars().nth(cur_idx + 1) { - if next_char == ' ' { - continue; // keep collecting spaces + if let Some(ref mut space_span) = last_space_span { + // If this is consecutive with the last space, extend it + if space_span.end == ch.span.start { + space_span.end = ch.span.end; } - - if "+*{?".contains(next_char) && consecutive_spaces == 2 { - continue; // ignore 2 consecutive spaces before quantifier + // If it is not consecutive, and the last space is only one space, move it up + else if space_span.size() == 1 { + last_space_span.replace(ch.span); } - - return Some((start.unwrap(), cur_idx)); + } else { + last_space_span = Some(ch.span); } + }); - // end of string - return Some((start.unwrap(), cur_idx)); + // return None if last_space_span length is only 1 + if last_space_span.is_some_and(|span| span.size() > 1) { + last_space_span + } else { + None } - - None } fn is_regexp_new_expression(expr: &NewExpression<'_>) -> bool { @@ -167,23 +161,47 @@ impl NoRegexSpaces { expr.callee.is_specific_id("RegExp") && expr.arguments.len() > 0 } - /// Whether the input has a character class but no consecutive spaces - /// outside the character class. - fn has_exempted_char_class(input: &str) -> bool { - let mut inside_class = false; + // For skipping if there aren't any consecutive spaces in the source, to avoid reporting cases + // where the space is explicitly escaped, like: `RegExp(' \ ')``. + fn has_double_space(input: &str) -> bool { + DOUBLE_SPACE.is_match(input) + } +} - for (i, c) in input.chars().enumerate() { - match c { - '[' => inside_class = true, - ']' => inside_class = false, - ' ' if input.chars().nth(i + 1) == Some(' ') && !inside_class => { - return false; - } - _ => {} +/// Calls the given closure on every [`Term`] in the [`Pattern`]. +fn visit_terms<'a, F: FnMut(&'a Term<'a>)>(pattern: &'a Pattern, f: &mut F) { + visit_terms_disjunction(&pattern.body, f); +} + +/// Calls the given closure on every [`Term`] in the [`Disjunction`]. +fn visit_terms_disjunction<'a, F: FnMut(&'a Term<'a>)>(disjunction: &'a Disjunction, f: &mut F) { + for alternative in &disjunction.body { + visit_terms_alternative(alternative, f); + } +} + +/// Calls the given closure on every [`Term`] in the [`Alternative`]. +fn visit_terms_alternative<'a, F: FnMut(&'a Term<'a>)>(alternative: &'a Alternative, f: &mut F) { + for term in &alternative.body { + match term { + Term::LookAroundAssertion(lookaround) => { + f(term); + visit_terms_disjunction(&lookaround.body, f); + } + Term::Quantifier(quant) => { + f(term); + f(&quant.body); } + Term::CapturingGroup(group) => { + f(term); + visit_terms_disjunction(&group.body, f); + } + Term::IgnoreGroup(group) => { + f(term); + visit_terms_disjunction(&group.body, f); + } + _ => f(term), } - - true } } @@ -208,6 +226,7 @@ fn test() { "var foo = / */;", "var foo = / {2}/;", "var foo = / {2}/v;", + "var foo = / /;", r"var foo = /bar \\ baz/;", r"var foo = /bar\\ \\ baz/;", r"var foo = /bar \\u0020 baz/;", @@ -234,6 +253,7 @@ fn test() { ]; let fail = vec![ + "var foo = / /;", "var foo = /bar baz/;", "var foo = /bar baz/;", "var foo = / a b c d /;", diff --git a/crates/oxc_linter/src/snapshots/no_regex_spaces.snap b/crates/oxc_linter/src/snapshots/no_regex_spaces.snap index 0880c08b999ff..10b9110ff367d 100644 --- a/crates/oxc_linter/src/snapshots/no_regex_spaces.snap +++ b/crates/oxc_linter/src/snapshots/no_regex_spaces.snap @@ -1,6 +1,13 @@ --- source: crates/oxc_linter/src/tester.rs --- + ⚠ eslint(no-regex-spaces): Spaces are hard to count. + ╭─[no_regex_spaces.tsx:1:12] + 1 │ var foo = / /; + · ── + ╰──── + help: Use a quantifier, e.g. {2} + ⚠ eslint(no-regex-spaces): Spaces are hard to count. ╭─[no_regex_spaces.tsx:1:15] 1 │ var foo = /bar baz/; @@ -46,28 +53,28 @@ source: crates/oxc_linter/src/tester.rs ⚠ eslint(no-regex-spaces): Spaces are hard to count. ╭─[no_regex_spaces.tsx:1:15] 1 │ var foo = /bar {3}baz/; - · ─── + · ── ╰──── help: Use a quantifier, e.g. {2} ⚠ eslint(no-regex-spaces): Spaces are hard to count. ╭─[no_regex_spaces.tsx:1:15] 1 │ var foo = /bar ?baz/; - · ──── + · ─── ╰──── help: Use a quantifier, e.g. {2} ⚠ eslint(no-regex-spaces): Spaces are hard to count. ╭─[no_regex_spaces.tsx:1:26] 1 │ var foo = new RegExp('bar *baz') - · ─── + · ── ╰──── help: Use a quantifier, e.g. {2} ⚠ eslint(no-regex-spaces): Spaces are hard to count. ╭─[no_regex_spaces.tsx:1:22] 1 │ var foo = RegExp('bar +baz') - · ─── + · ── ╰──── help: Use a quantifier, e.g. {2} @@ -170,8 +177,8 @@ source: crates/oxc_linter/src/tester.rs help: Use a quantifier, e.g. {2} ⚠ eslint(no-regex-spaces): Spaces are hard to count. - ╭─[no_regex_spaces.tsx:1:35] + ╭─[no_regex_spaces.tsx:1:30] 1 │ var foo = new RegExp('[[ ] ] ', 'v'); - · ──── + · ──── ╰──── help: Use a quantifier, e.g. {2}