Skip to content

Commit

Permalink
feat(noUselessEscapeInRegex): add rule (#3604)
Browse files Browse the repository at this point in the history
Co-authored-by: Arend van Beelen jr. <[email protected]>
  • Loading branch information
Conaclos and arendjr authored Aug 7, 2024
1 parent bdc6e14 commit f1ceeef
Show file tree
Hide file tree
Showing 15 changed files with 1,658 additions and 62 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ our [guidelines for writing a good changelog entry](https://github.com/biomejs/b
#### New features

- Add support for GraphQL linting. Contributed by @ematipico

- Add [nursery/noDynamicNamespaceImportAccess](https://biomejs.dev/linter/no-dynamic-namespace-import-access/). Contributed by @minht11

- [noUndeclaredVariables](https://biomejs.dev/linter/rules/no-undeclared-variables/) no longer reports a direct reference to an enum member ([#2974](https://github.com/biomejs/biome/issues/2974)).

In the following code, the `A` reference is no longer reported as an undeclared variable.
Expand All @@ -187,9 +189,14 @@ our [guidelines for writing a good changelog entry](https://github.com/biomejs/b
Contributed by @Conaclos

- Add [nursery/noIrregularWhitespace](https://biomejs.dev/linter/rules/no-irregular-whitespace). Contributed by @michellocana

- Implement `noIrreguluarWhitespace` for CSS. Contributed by @DerTimonius

- Add [nursery/useTrimStartEnd](https://biomejs.dev/linter/rules/use-trim-start-end/). Contributed by @chansuke

- Add [nursery/noUselessEscapeInRegex](https://biomejs.dev/linter/rules/no-useless-escape-in-regex/).
Contributed by @Conaclos

#### Enhancements

- [noInvalidUseBeforeDeclaration](https://biomejs.dev/linter/rules/no-invalid-use-before-declaration) now reports direct use of an enum member before its declaration.
Expand Down
10 changes: 10 additions & 0 deletions crates/biome_cli/src/execute/migrate/eslint_any_rule_to_biome.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

134 changes: 78 additions & 56 deletions crates/biome_configuration/src/analyzer/linter/rules.rs

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion crates/biome_diagnostics_categories/src/categories.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ define_categories! {
"lint/nursery/noMissingGenericFamilyKeyword": "https://biomejs.dev/linter/rules/no-missing-generic-family-keyword",
"lint/nursery/noReactSpecificProps": "https://biomejs.dev/linter/rules/no-react-specific-props",
"lint/nursery/noRestrictedImports": "https://biomejs.dev/linter/rules/no-restricted-imports",
"lint/nursery/noStaticElementInteractions": "https://biomejs.dev/linter/rules/no-static-element-interactions",
"lint/nursery/noShorthandPropertyOverrides": "https://biomejs.dev/linter/rules/no-shorthand-property-overrides",
"lint/nursery/noStaticElementInteractions": "https://biomejs.dev/linter/rules/no-static-element-interactions",
"lint/nursery/noSubstr": "https://biomejs.dev/linter/rules/no-substr",
"lint/nursery/noUndeclaredDependencies": "https://biomejs.dev/linter/rules/no-undeclared-dependencies",
"lint/nursery/noUnknownFunction": "https://biomejs.dev/linter/rules/no-unknown-function",
Expand All @@ -149,6 +149,7 @@ define_categories! {
"lint/nursery/noUnknownUnit": "https://biomejs.dev/linter/rules/no-unknown-unit",
"lint/nursery/noUnmatchableAnbSelector": "https://biomejs.dev/linter/rules/no-unmatchable-anb-selector",
"lint/nursery/noUnusedFunctionParameters": "https://biomejs.dev/linter/rules/no-unused-function-parameters",
"lint/nursery/noUselessEscapeInRegex": "https://biomejs.dev/linter/rules/no-useless-escape-in-regex",
"lint/nursery/noUselessStringConcat": "https://biomejs.dev/linter/rules/no-useless-string-concat",
"lint/nursery/noUselessUndefinedInitialization": "https://biomejs.dev/linter/rules/no-useless-undefined-initialization",
"lint/nursery/noValueAtRule": "https://biomejs.dev/linter/rules/no-value-at-rule",
Expand Down
2 changes: 2 additions & 0 deletions crates/biome_js_analyze/src/lint/nursery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub mod no_static_element_interactions;
pub mod no_substr;
pub mod no_undeclared_dependencies;
pub mod no_unused_function_parameters;
pub mod no_useless_escape_in_regex;
pub mod no_useless_string_concat;
pub mod no_useless_undefined_initialization;
pub mod no_yoda_expression;
Expand Down Expand Up @@ -59,6 +60,7 @@ declare_lint_group! {
self :: no_substr :: NoSubstr ,
self :: no_undeclared_dependencies :: NoUndeclaredDependencies ,
self :: no_unused_function_parameters :: NoUnusedFunctionParameters ,
self :: no_useless_escape_in_regex :: NoUselessEscapeInRegex ,
self :: no_useless_string_concat :: NoUselessStringConcat ,
self :: no_useless_undefined_initialization :: NoUselessUndefinedInitialization ,
self :: no_yoda_expression :: NoYodaExpression ,
Expand Down
293 changes: 293 additions & 0 deletions crates/biome_js_analyze/src/lint/nursery/no_useless_escape_in_regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,293 @@
use biome_analyze::{
context::RuleContext, declare_lint_rule, ActionCategory, Ast, FixKind, Rule, RuleDiagnostic,
RuleSource,
};
use biome_console::markup;
use biome_js_syntax::{JsRegexLiteralExpression, JsSyntaxKind, JsSyntaxToken};
use biome_rowan::{AstNode, BatchMutationExt, TextRange, TextSize};

use crate::JsRuleAction;

declare_lint_rule! {
/// Disallow unnecessary escape sequence in regular expression literals.
///
/// Escaping non-special characters in regular expression literals doesn't have any effect.
/// Hence, they may confuse a reader.
///
/// ## Examples
///
/// ### Invalid
///
/// ```js,expect_diagnostic
/// /\a/;
/// ```
///
/// ```js,expect_diagnostic
/// /[\-]/;
/// ```
///
/// ```js,expect_diagnostic
/// /[\&]/v;
/// ```
///
/// ### Valid
///
/// ```js
/// /\^\d\b/
/// ```
///
/// ```js
/// /[\b]/
/// ```
pub NoUselessEscapeInRegex {
version: "next",
name: "noUselessEscapeInRegex",
language: "js",
sources: &[RuleSource::Eslint("no-useless-escape")],
recommended: true,
fix_kind: FixKind::Safe,
}
}

impl Rule for NoUselessEscapeInRegex {
type Query = Ast<JsRegexLiteralExpression>;
type State = State;
type Signals = Option<Self::State>;
type Options = ();

fn run(ctx: &RuleContext<Self>) -> Self::Signals {
let node = ctx.query();
let (pattern, flags) = node.decompose().ok()?;
let bytes = pattern.as_bytes();
let mut byte_it = bytes.iter().enumerate();
let has_v_flag = flags.text().as_bytes().contains(&b'v');
let has_u_flag = flags.text().as_bytes().contains(&b'u');
let is_unicode_aware = has_v_flag || has_u_flag;
while let Some((index, byte)) = byte_it.next() {
match byte {
b'\\' => {
let Some((_, escaped)) = byte_it.next() else {
break;
};
match escaped {
b'\\'
| b'/'
// Anchrors
| b'^' | b'$'
// chartacaters sets
| b'.' | b'd' | b'D' | b'w' | b'W' | b's' | b'S' |
b't' | b'r' | b'n' | b'v' | b'f' | b'0' | b'c' | b'x' | b'u'
// char claass
| b'[' | b']'
// Word boundary
| b'b' | b'B'
// quantrifiers
| b'*' | b'+' | b'?' | b'{' | b'}'
// Backreferences
| b'1'..b'9'
// Groups
| b'(' | b')'
// Alternation
| b'|' => {}
b'p' | b'P' | b'k' | b'q' if is_unicode_aware => {}
_ => {
return Some(State {
backslash_index: index as u16,
escaped: *escaped,
in_char_class: false,
});
}
}
}
b'[' => {
let char_class_start_index = index;
let mut inner_class_count = 0;
while let Some((index, byte)) = byte_it.next() {
match byte {
b'\\' => {
let Some((escaped_index, escaped)) = byte_it.next() else {
break;
};
match escaped {
// `^` can be escaped to avoid the negation of the char class.
b'^' if escaped_index == (char_class_start_index + 2) => {}
// No need to escape `-` at the start
b'-' if has_v_flag || escaped_index != (char_class_start_index + 2) => {}
b'\\'
| b']'
// chartacaters sets
| b'd' | b'D' | b'w' | b'W' | b's' | b'S' |
b't' | b'r' | b'n' | b'v' | b'f' | b'b' | b'0' |
b'c' | b'x' | b'u' => {}
b'p' | b'P' | b'k' | b'q' if is_unicode_aware => {}
// Invalid speccial characters in char class under the `v` flag.
b'(' | b')' | b'[' | b'{' | b'}' | b'/' | b'|' if has_v_flag => {}
// Perhaps a doubled punctuator
b'&' | b'!' | b'#' | b'$' | b'%' | b'*' | b'+' | b','
| b'.' | b':' | b';' | b'<' | b'=' | b'>' | b'?'
| b'@' | b'`' | b'~' if has_v_flag => {
if bytes[index-1] != *escaped && !byte_it.next().is_some_and(|(_, byte)| byte == escaped) {
return Some(State {
backslash_index: index as u16,
escaped: *escaped,
in_char_class: true,
});
}
}
b'_' if has_v_flag => {
// `[\_^^]`
if !byte_it.next().is_some_and(|(_, byte)| *byte == b'^') &&
!byte_it.next().is_some_and(|(_, byte)| *byte == b'^') {
return Some(State {
backslash_index: index as u16,
escaped: *escaped,
in_char_class: true,
});
}
}
b'^' if has_v_flag => {
let must_be_escaped =
// `[_^\^]`
// `[^^\^]`
(matches!(bytes.get(index-2), Some(&b'_' | &b'^')) && bytes[index-1] == b'^') ||
(byte_it.next().is_some_and(|(_, byte)| *byte == b'^') && (
// `[_\^^]`
// `[^\^^]`
matches!(bytes[index-1], b'_' | b'^') ||
// `[\^^^]`
byte_it.next().is_some_and(|(_, byte)| *byte == b'^')
));
if !must_be_escaped {
return Some(State {
backslash_index: index as u16,
escaped: *escaped,
in_char_class: true,
});
}
}
_ => {
return Some(State {
backslash_index: index as u16,
escaped: *escaped,
in_char_class: true,
});
}
}
}
b'[' => {
if has_v_flag {
inner_class_count += 1;
}
}
b']' => {
if has_v_flag && inner_class_count != 0 {
inner_class_count -= 1;
} else if !has_v_flag && bytes[index - 1] == b'-' {
return Some(State {
backslash_index: (index - 2) as u16,
escaped: b'-',
in_char_class: false,
});
} else {
break;
}
}
_ => {}
}
}
}
_ => {}
}
}
None
}

fn diagnostic(ctx: &RuleContext<Self>, state: &Self::State) -> Option<RuleDiagnostic> {
let State {
backslash_index,
escaped,
in_char_class,
} = state;
// Add 1 because the index was computed in the pattern (it doesn't take `/` into account).
let adjusted_backslash_index = (*backslash_index as u32) + 1;
let node = ctx.query();
let backslash_position = node.range().start() + TextSize::from(adjusted_backslash_index);
// To compute the correct text range, we need the byte length of the escaped character.
// To get that, we take a string slice from the escaped character and iterate until thenext character.
// The index of the next character corresponds to the byte length of the escaped character.
let (escaped_byte_len, _) = &node.value_token().ok()?.text_trimmed()
[(adjusted_backslash_index as usize + 1)..]
.char_indices()
.nth(1)?;
let diag = RuleDiagnostic::new(
rule_category!(),
TextRange::at(backslash_position, (1 + *escaped_byte_len as u32).into()),
markup! {
"The character doesn't need to be escaped."
},
);
Some(if matches!(escaped, b'p' | b'P' | b'k') {
diag.note("The escape sequence is only useful if the regular expression is unicode-aware. To be unicode-aware, the `u` or `v` flag should be used.")
} else if *in_char_class {
match escaped {
b'^' => {
diag.note("The character should only be escaped if it is the first character of the class.")
}
b'B' => {
diag.note("The escape sequence only has meaning outside a character class.")
}
b'(' | b')' | b'[' | b'{' | b'}' | b'/' | b'|' => {
diag.note("The character should only be escaped if it is outside a character class or under the `v` flag.")
}
b'.' | b'$' | b'*' | b'+' | b'?' => {
diag.note("The character should only be escaped if it is outside a character class.")
}
b'-' => {
diag.note("The character should only be escaped if it appears in the middle of the character class or under the `v` flag.")
}
_ => diag,
}
} else {
diag
})
}

fn action(ctx: &RuleContext<Self>, state: &Self::State) -> Option<JsRuleAction> {
let State {
backslash_index, ..
} = state;
// Add 1 because the index was computed in the pattern (it doesn't take `/` into account).
let adjusted_backslash_index = (*backslash_index as usize) + 1;
let node = ctx.query();
let value_token = node.value_token().ok()?;
let regex_text = value_token.text_trimmed();
debug_assert!(
regex_text.as_bytes().get(adjusted_backslash_index) == Some(&b'\\'),
"backslash_index should points to a backslash."
);
let new_regex = JsSyntaxToken::new_detached(
JsSyntaxKind::JS_REGEX_LITERAL,
&format!(
"{}{}",
&regex_text[..adjusted_backslash_index],
&regex_text[(adjusted_backslash_index + 1)..]
),
[],
[],
);
let mut mutation = ctx.root().begin();
mutation.replace_token(value_token, new_regex);
Some(JsRuleAction::new(
ActionCategory::QuickFix,
ctx.metadata().applicability(),
markup! { "Unescape the character." }.to_owned(),
mutation,
))
}
}

pub struct State {
backslash_index: u16,
escaped: u8,
in_char_class: bool,
}
1 change: 1 addition & 0 deletions crates/biome_js_analyze/src/options.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f1ceeef

Please sign in to comment.