diff --git a/crates/oxc_linter/src/rules/eslint/no_control_regex.rs b/crates/oxc_linter/src/rules/eslint/no_control_regex.rs index cf683780a40d8..fb6e66e60bab2 100644 --- a/crates/oxc_linter/src/rules/eslint/no_control_regex.rs +++ b/crates/oxc_linter/src/rules/eslint/no_control_regex.rs @@ -1,25 +1,88 @@ +use std::fmt::Write; + use itertools::Itertools as _; + use oxc_diagnostics::OxcDiagnostic; use oxc_macros::declare_oxc_lint; use oxc_regular_expression::{ - ast::{CapturingGroup, Character, Pattern}, + ast::{CapturingGroup, Character, CharacterKind, Pattern}, visit::{Visit, walk}, }; use oxc_span::Span; use crate::{AstNode, context::LintContext, rule::Rule, utils::run_on_regex_node}; -fn no_control_regex_diagnostic(count: usize, regex: &str, span: Span) -> OxcDiagnostic { +fn no_control_regex_diagnostic(control_chars: &[Character], span: Span) -> OxcDiagnostic { + let count = control_chars.len(); debug_assert!(count > 0); - let (message, help) = if count == 1 { - ("Unexpected control character", format!("'{regex}' is not a valid control character.")) - } else { - ("Unexpected control characters", format!("'{regex}' are not valid control characters.")) - }; - OxcDiagnostic::warn(message).with_help(help).with_label(span) -} + let mut octal_chars = Vec::new(); + let mut null_chars = Vec::new(); + let mut other_chars = Vec::new(); + for ch in control_chars { + match ch.kind { + CharacterKind::Octal1 | CharacterKind::Octal2 | CharacterKind::Octal3 => { + octal_chars.push(ch); + } + CharacterKind::Null => { + null_chars.push(ch); + } + _ => { + other_chars.push(ch); + } + } + } + + let mut help = String::new(); + + if !other_chars.is_empty() { + let regexes = other_chars.iter().join(", "); + writeln!( + help, + "'{regexes}' {} not {}valid control character{}.", + if other_chars.len() > 1 { "are" } else { "is" }, + if other_chars.len() > 1 { "" } else { "a " }, + if other_chars.len() > 1 { "s" } else { "" }, + ) + .unwrap(); + } + + if !octal_chars.is_empty() { + let regexes = octal_chars.iter().join(", "); + writeln!( + help, + "'{regexes}' {} not {}valid control character{}. They look like backreferences, but there {} no corresponding capture group{}. If you are trying to match the octal character, consider using '\\xNN' or '\\u00NN' instead.", + if octal_chars.len() > 1 { "are" } else { "is" }, + if octal_chars.len() > 1 { "" } else { "a " }, + if octal_chars.len() > 1 { "s" } else { "" }, + if octal_chars.len() > 1 { "are" } else { "is" }, + if octal_chars.len() > 1 { "s" } else { "" } + ).unwrap(); + } + + if !null_chars.is_empty() { + writeln!( + help, + "'\\0' matches the null character (U+0000), which is a control character. If you intend to match the null character, consider using '\\x00' or '\\u0000' for clarity." + ).unwrap(); + } + + debug_assert!(!help.is_empty()); + debug_assert!(help.chars().last().is_some_and(|char| char == '\n')); + + if !help.is_empty() { + help.truncate(help.len() - 1); + } + + OxcDiagnostic::warn(if count > 1 { + "Unexpected control characters" + } else { + "Unexpected control character" + }) + .with_help(help) + .with_label(span) +} #[derive(Debug, Default, Clone)] pub struct NoControlRegex; @@ -84,9 +147,7 @@ fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) { finder.visit_pattern(pattern); if !finder.control_chars.is_empty() { - let num_control_chars = finder.control_chars.len(); - let violations = finder.control_chars.into_iter().map(|c| c.to_string()).join(", "); - context.diagnostic(no_control_regex_diagnostic(num_control_chars, &violations, span)); + context.diagnostic(no_control_regex_diagnostic(&finder.control_chars, span)); } } @@ -152,7 +213,7 @@ mod tests { use super::*; use crate::tester::Tester; - #[test] + #[test] // fn test_hex_literals() { Tester::new( NoControlRegex::NAME, @@ -298,6 +359,10 @@ mod tests { r"/\x0d/u", r"/\u{09}/u", r"/\x09/u", + r"/\0\1\2/", + r"/\x1f\2/", + r"/\x1f\0/", + r"/\x1f\0\2/", ], ) .test_and_snapshot(); diff --git a/crates/oxc_linter/src/snapshots/eslint_no_control_regex.snap b/crates/oxc_linter/src/snapshots/eslint_no_control_regex.snap index aaa05502e0d72..80654e31f495c 100644 Binary files a/crates/oxc_linter/src/snapshots/eslint_no_control_regex.snap and b/crates/oxc_linter/src/snapshots/eslint_no_control_regex.snap differ diff --git a/crates/oxc_linter/src/snapshots/eslint_no_control_regex@capture-group-indexing.snap b/crates/oxc_linter/src/snapshots/eslint_no_control_regex@capture-group-indexing.snap index e571967fdda45..be4bc264691a0 100644 --- a/crates/oxc_linter/src/snapshots/eslint_no_control_regex@capture-group-indexing.snap +++ b/crates/oxc_linter/src/snapshots/eslint_no_control_regex@capture-group-indexing.snap @@ -6,25 +6,25 @@ source: crates/oxc_linter/src/tester.rs 1 │ const r = /\0/; · ──── ╰──── - help: '\0' is not a valid control character. + help: '\0' matches the null character (U+0000), which is a control character. If you intend to match the null character, consider using '\x00' or '\u0000' for clarity. ⚠ eslint(no-control-regex): Unexpected control character ╭─[no_control_regex.tsx:1:11] 1 │ const r = /[a-z]\1/; · ───────── ╰──── - help: '\1' is not a valid control character. + help: '\1' is not a valid control character. They look like backreferences, but there is no corresponding capture group. If you are trying to match the octal character, consider using '\xNN' or '\u00NN' instead. ⚠ eslint(no-control-regex): Unexpected control character ╭─[no_control_regex.tsx:1:11] 1 │ const r = /([a-z])\2/; · ─────────── ╰──── - help: '\2' is not a valid control character. + help: '\2' is not a valid control character. They look like backreferences, but there is no corresponding capture group. If you are trying to match the octal character, consider using '\xNN' or '\u00NN' instead. ⚠ eslint(no-control-regex): Unexpected control character ╭─[no_control_regex.tsx:1:11] 1 │ const r = /([a-z])\0/; · ─────────── ╰──── - help: '\0' is not a valid control character. + help: '\0' matches the null character (U+0000), which is a control character. If you intend to match the null character, consider using '\x00' or '\u0000' for clarity.