feat(noUselessEscapeInRegex): add rule (#3604)

Co-authored-by: Arend van Beelen jr. <[email protected]>
biomejs · Aug 7, 2024 · f1ceeef · f1ceeef
1 parent bdc6e14
commit f1ceeef
Show file tree

Hide file tree

Showing 15 changed files with 1,658 additions and 62 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -172,7 +172,9 @@ our [guidelines for writing a good changelog entry](https://github.com/biomejs/b
 #### New features
 
 - Add support for GraphQL linting. Contributed by @ematipico
+
 - Add [nursery/noDynamicNamespaceImportAccess](https://biomejs.dev/linter/no-dynamic-namespace-import-access/). Contributed by @minht11
+
 - [noUndeclaredVariables](https://biomejs.dev/linter/rules/no-undeclared-variables/) no longer reports a direct reference to an enum member ([#2974](https://github.com/biomejs/biome/issues/2974)).
 
   In the following code, the `A` reference is no longer reported as an undeclared variable.
@@ -187,9 +189,14 @@ our [guidelines for writing a good changelog entry](https://github.com/biomejs/b
   Contributed by @Conaclos
 
 - Add [nursery/noIrregularWhitespace](https://biomejs.dev/linter/rules/no-irregular-whitespace). Contributed by @michellocana
+
 - Implement `noIrreguluarWhitespace` for CSS. Contributed by @DerTimonius
+
 - Add [nursery/useTrimStartEnd](https://biomejs.dev/linter/rules/use-trim-start-end/). Contributed by @chansuke
 
+- Add [nursery/noUselessEscapeInRegex](https://biomejs.dev/linter/rules/no-useless-escape-in-regex/).
+  Contributed by @Conaclos
+
 #### Enhancements
 
 - [noInvalidUseBeforeDeclaration](https://biomejs.dev/linter/rules/no-invalid-use-before-declaration) now reports direct use of an enum member before its declaration.

diff --git a/crates/biome_cli/src/execute/migrate/eslint_any_rule_to_biome.rs b/crates/biome_cli/src/execute/migrate/eslint_any_rule_to_biome.rs
diff --git a/crates/biome_configuration/src/analyzer/linter/rules.rs b/crates/biome_configuration/src/analyzer/linter/rules.rs
diff --git a/crates/biome_diagnostics_categories/src/categories.rs b/crates/biome_diagnostics_categories/src/categories.rs
@@ -137,8 +137,8 @@ define_categories! {
     "lint/nursery/noMissingGenericFamilyKeyword": "https://biomejs.dev/linter/rules/no-missing-generic-family-keyword",
     "lint/nursery/noReactSpecificProps": "https://biomejs.dev/linter/rules/no-react-specific-props",
     "lint/nursery/noRestrictedImports": "https://biomejs.dev/linter/rules/no-restricted-imports",
-    "lint/nursery/noStaticElementInteractions": "https://biomejs.dev/linter/rules/no-static-element-interactions",
     "lint/nursery/noShorthandPropertyOverrides": "https://biomejs.dev/linter/rules/no-shorthand-property-overrides",
+    "lint/nursery/noStaticElementInteractions": "https://biomejs.dev/linter/rules/no-static-element-interactions",
     "lint/nursery/noSubstr": "https://biomejs.dev/linter/rules/no-substr",
     "lint/nursery/noUndeclaredDependencies": "https://biomejs.dev/linter/rules/no-undeclared-dependencies",
     "lint/nursery/noUnknownFunction": "https://biomejs.dev/linter/rules/no-unknown-function",
@@ -149,6 +149,7 @@ define_categories! {
     "lint/nursery/noUnknownUnit": "https://biomejs.dev/linter/rules/no-unknown-unit",
     "lint/nursery/noUnmatchableAnbSelector": "https://biomejs.dev/linter/rules/no-unmatchable-anb-selector",
     "lint/nursery/noUnusedFunctionParameters": "https://biomejs.dev/linter/rules/no-unused-function-parameters",
+    "lint/nursery/noUselessEscapeInRegex": "https://biomejs.dev/linter/rules/no-useless-escape-in-regex",
     "lint/nursery/noUselessStringConcat": "https://biomejs.dev/linter/rules/no-useless-string-concat",
     "lint/nursery/noUselessUndefinedInitialization": "https://biomejs.dev/linter/rules/no-useless-undefined-initialization",
     "lint/nursery/noValueAtRule": "https://biomejs.dev/linter/rules/no-value-at-rule",

diff --git a/crates/biome_js_analyze/src/lint/nursery.rs b/crates/biome_js_analyze/src/lint/nursery.rs
@@ -17,6 +17,7 @@ pub mod no_static_element_interactions;
 pub mod no_substr;
 pub mod no_undeclared_dependencies;
 pub mod no_unused_function_parameters;
+pub mod no_useless_escape_in_regex;
 pub mod no_useless_string_concat;
 pub mod no_useless_undefined_initialization;
 pub mod no_yoda_expression;
@@ -59,6 +60,7 @@ declare_lint_group! {
             self :: no_substr :: NoSubstr ,
             self :: no_undeclared_dependencies :: NoUndeclaredDependencies ,
             self :: no_unused_function_parameters :: NoUnusedFunctionParameters ,
+            self :: no_useless_escape_in_regex :: NoUselessEscapeInRegex ,
             self :: no_useless_string_concat :: NoUselessStringConcat ,
             self :: no_useless_undefined_initialization :: NoUselessUndefinedInitialization ,
             self :: no_yoda_expression :: NoYodaExpression ,

diff --git a/crates/biome_js_analyze/src/lint/nursery/no_useless_escape_in_regex.rs b/crates/biome_js_analyze/src/lint/nursery/no_useless_escape_in_regex.rs
@@ -0,0 +1,293 @@
+use biome_analyze::{
+    context::RuleContext, declare_lint_rule, ActionCategory, Ast, FixKind, Rule, RuleDiagnostic,
+    RuleSource,
+};
+use biome_console::markup;
+use biome_js_syntax::{JsRegexLiteralExpression, JsSyntaxKind, JsSyntaxToken};
+use biome_rowan::{AstNode, BatchMutationExt, TextRange, TextSize};
+
+use crate::JsRuleAction;
+
+declare_lint_rule! {
+    /// Disallow unnecessary escape sequence in regular expression literals.
+    ///
+    /// Escaping non-special characters in regular expression literals doesn't have any effect.
+    /// Hence, they may confuse a reader.
+    ///
+    /// ## Examples
+    ///
+    /// ### Invalid
+    ///
+    /// ```js,expect_diagnostic
+    /// /\a/;
+    /// ```
+    ///
+    /// ```js,expect_diagnostic
+    /// /[\-]/;
+    /// ```
+    ///
+    /// ```js,expect_diagnostic
+    /// /[\&]/v;
+    /// ```
+    ///
+    /// ### Valid
+    ///
+    /// ```js
+    /// /\^\d\b/
+    /// ```
+    ///
+    /// ```js
+    /// /[\b]/
+    /// ```
+    pub NoUselessEscapeInRegex {
+        version: "next",
+        name: "noUselessEscapeInRegex",
+        language: "js",
+        sources: &[RuleSource::Eslint("no-useless-escape")],
+        recommended: true,
+        fix_kind: FixKind::Safe,
+    }
+}
+
+impl Rule for NoUselessEscapeInRegex {
+    type Query = Ast<JsRegexLiteralExpression>;
+    type State = State;
+    type Signals = Option<Self::State>;
+    type Options = ();
+
+    fn run(ctx: &RuleContext<Self>) -> Self::Signals {
+        let node = ctx.query();
+        let (pattern, flags) = node.decompose().ok()?;
+        let bytes = pattern.as_bytes();
+        let mut byte_it = bytes.iter().enumerate();
+        let has_v_flag = flags.text().as_bytes().contains(&b'v');
+        let has_u_flag = flags.text().as_bytes().contains(&b'u');
+        let is_unicode_aware = has_v_flag || has_u_flag;
+        while let Some((index, byte)) = byte_it.next() {
+            match byte {
+                b'\\' => {
+                    let Some((_, escaped)) = byte_it.next() else {
+                        break;
+                    };
+                    match escaped {
+                        b'\\'
+                        | b'/'
+                        // Anchrors
+                        | b'^' | b'$'
+                        // chartacaters sets
+                        | b'.' | b'd' | b'D' | b'w' | b'W' | b's' | b'S' |
+                        b't' | b'r' | b'n' | b'v' | b'f' | b'0' | b'c' | b'x' | b'u'
+                        // char claass
+                        | b'[' | b']'
+                        // Word boundary
+                        | b'b' | b'B'
+                        // quantrifiers
+                        | b'*' | b'+' | b'?' | b'{' | b'}'
+                        // Backreferences
+                        | b'1'..b'9'
+                        // Groups
+                        | b'(' | b')'
+                        // Alternation
+                        | b'|' => {}
+                        b'p' | b'P' | b'k' | b'q' if is_unicode_aware => {}
+                        _ => {
+                            return Some(State {
+                                backslash_index: index as u16,
+                                escaped: *escaped,
+                                in_char_class: false,
+                            });
+                        }
+                    }
+                }
+                b'[' => {
+                    let char_class_start_index = index;
+                    let mut inner_class_count = 0;
+                    while let Some((index, byte)) = byte_it.next() {
+                        match byte {
+                            b'\\' => {
+                                let Some((escaped_index, escaped)) = byte_it.next() else {
+                                    break;
+                                };
+                                match escaped {
+                                    // `^` can be escaped to avoid the negation of the char class.
+                                    b'^' if escaped_index == (char_class_start_index + 2) => {}
+                                    // No need to escape `-` at the start
+                                    b'-' if has_v_flag || escaped_index != (char_class_start_index + 2) => {}
+                                    b'\\'
+                                    | b']'
+                                    // chartacaters sets
+                                    | b'd' | b'D' | b'w' | b'W' | b's' | b'S' |
+                                    b't' | b'r' | b'n' | b'v' | b'f' | b'b' | b'0' |
+                                    b'c' | b'x' | b'u' => {}
+                                    b'p' | b'P' | b'k' | b'q' if is_unicode_aware => {}
+                                    // Invalid speccial characters in char class under the `v` flag.
+                                    b'(' | b')' | b'[' | b'{' | b'}' | b'/' | b'|' if has_v_flag => {}
+                                    // Perhaps a doubled punctuator
+                                    b'&' | b'!' | b'#' | b'$' | b'%' | b'*' | b'+' | b','
+                                    | b'.' | b':' | b';' | b'<' | b'=' | b'>' | b'?'
+                                    | b'@' | b'`' | b'~' if has_v_flag => {
+                                        if bytes[index-1] != *escaped && !byte_it.next().is_some_and(|(_, byte)| byte == escaped) {
+                                            return Some(State {
+                                                backslash_index: index as u16,
+                                                escaped: *escaped,
+                                                in_char_class: true,
+                                            });
+                                        }
+                                    }
+                                    b'_' if has_v_flag => {
+                                        // `[\_^^]`
+                                        if !byte_it.next().is_some_and(|(_, byte)| *byte == b'^') &&
+                                            !byte_it.next().is_some_and(|(_, byte)| *byte == b'^') {
+                                            return Some(State {
+                                                backslash_index: index as u16,
+                                                escaped: *escaped,
+                                                in_char_class: true,
+                                            });
+                                        }
+                                    }
+                                    b'^' if has_v_flag  => {
+                                        let must_be_escaped =
+                                            // `[_^\^]`
+                                            // `[^^\^]`
+                                            (matches!(bytes.get(index-2), Some(&b'_' | &b'^')) && bytes[index-1] == b'^') ||
+                                            (byte_it.next().is_some_and(|(_, byte)| *byte == b'^') && (
+                                                // `[_\^^]`
+                                                // `[^\^^]`
+                                                matches!(bytes[index-1], b'_' | b'^') ||
+                                                // `[\^^^]`
+                                                byte_it.next().is_some_and(|(_, byte)| *byte == b'^')
+                                            ));
+                                        if !must_be_escaped {
+                                            return Some(State {
+                                                backslash_index: index as u16,
+                                                escaped: *escaped,
+                                                in_char_class: true,
+                                            });
+                                        }
+                                    }
+                                    _ => {
+                                        return Some(State {
+                                            backslash_index: index as u16,
+                                            escaped: *escaped,
+                                            in_char_class: true,
+                                        });
+                                    }
+                                }
+                            }
+                            b'[' => {
+                                if has_v_flag {
+                                    inner_class_count += 1;
+                                }
+                            }
+                            b']' => {
+                                if has_v_flag && inner_class_count != 0 {
+                                    inner_class_count -= 1;
+                                } else if !has_v_flag && bytes[index - 1] == b'-' {
+                                    return Some(State {
+                                        backslash_index: (index - 2) as u16,
+                                        escaped: b'-',
+                                        in_char_class: false,
+                                    });
+                                } else {
+                                    break;
+                                }
+                            }
+                            _ => {}
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+        None
+    }
+
+    fn diagnostic(ctx: &RuleContext<Self>, state: &Self::State) -> Option<RuleDiagnostic> {
+        let State {
+            backslash_index,
+            escaped,
+            in_char_class,
+        } = state;
+        // Add 1 because the index was computed in the pattern (it doesn't take `/` into account).
+        let adjusted_backslash_index = (*backslash_index as u32) + 1;
+        let node = ctx.query();
+        let backslash_position = node.range().start() + TextSize::from(adjusted_backslash_index);
+        // To compute the correct text range, we need the byte length of the escaped character.
+        // To get that, we take a string slice from the escaped character and iterate until thenext character.
+        // The index of the next character corresponds to the byte length of the escaped character.
+        let (escaped_byte_len, _) = &node.value_token().ok()?.text_trimmed()
+            [(adjusted_backslash_index as usize + 1)..]
+            .char_indices()
+            .nth(1)?;
+        let diag = RuleDiagnostic::new(
+            rule_category!(),
+            TextRange::at(backslash_position, (1 + *escaped_byte_len as u32).into()),
+            markup! {
+                "The character doesn't need to be escaped."
+            },
+        );
+        Some(if matches!(escaped, b'p' | b'P' | b'k') {
+            diag.note("The escape sequence is only useful if the regular expression is unicode-aware. To be unicode-aware, the `u` or `v` flag should be used.")
+        } else if *in_char_class {
+            match escaped {
+                b'^' => {
+                    diag.note("The character should only be escaped if it is the first character of the class.")
+                }
+                b'B' => {
+                    diag.note("The escape sequence only has meaning outside a character class.")
+                }
+                b'(' | b')' | b'[' | b'{' | b'}' | b'/' | b'|' => {
+                    diag.note("The character should only be escaped if it is outside a character class or under the `v` flag.")
+                }
+                b'.' | b'$' | b'*' | b'+' | b'?' => {
+                    diag.note("The character should only be escaped if it is outside a character class.")
+                }
+                b'-' => {
+                    diag.note("The character should only be escaped if it appears in the middle of the character class or under the `v` flag.")
+                }
+                _ => diag,
+            }
+        } else {
+            diag
+        })
+    }
+
+    fn action(ctx: &RuleContext<Self>, state: &Self::State) -> Option<JsRuleAction> {
+        let State {
+            backslash_index, ..
+        } = state;
+        // Add 1 because the index was computed in the pattern (it doesn't take `/` into account).
+        let adjusted_backslash_index = (*backslash_index as usize) + 1;
+        let node = ctx.query();
+        let value_token = node.value_token().ok()?;
+        let regex_text = value_token.text_trimmed();
+        debug_assert!(
+            regex_text.as_bytes().get(adjusted_backslash_index) == Some(&b'\\'),
+            "backslash_index should points to a backslash."
+        );
+        let new_regex = JsSyntaxToken::new_detached(
+            JsSyntaxKind::JS_REGEX_LITERAL,
+            &format!(
+                "{}{}",
+                &regex_text[..adjusted_backslash_index],
+                &regex_text[(adjusted_backslash_index + 1)..]
+            ),
+            [],
+            [],
+        );
+        let mut mutation = ctx.root().begin();
+        mutation.replace_token(value_token, new_regex);
+        Some(JsRuleAction::new(
+            ActionCategory::QuickFix,
+            ctx.metadata().applicability(),
+            markup! { "Unescape the character." }.to_owned(),
+            mutation,
+        ))
+    }
+}
+
+pub struct State {
+    backslash_index: u16,
+    escaped: u8,
+    in_char_class: bool,
+}
diff --git a/crates/biome_js_analyze/src/options.rs b/crates/biome_js_analyze/src/options.rs