From 10e77d7d9a72680b6b88356d2ff20e680f391c10 Mon Sep 17 00:00:00 2001 From: camc314 <18101008+camc314@users.noreply.github.com> Date: Mon, 12 May 2025 11:42:31 +0000 Subject: [PATCH] fix(linter): improve diagnostics for no-control-regex (#10959) closes #10950, by improving the diagnostic when (deprecated) octal escapes are used --- .../src/rules/eslint/no_control_regex.rs | 91 +++++++++++++++--- .../snapshots/eslint_no_control_regex.snap | Bin 5225 -> 7198 bytes ..._control_regex@capture-group-indexing.snap | 8 +- 3 files changed, 82 insertions(+), 17 deletions(-) diff --git a/crates/oxc_linter/src/rules/eslint/no_control_regex.rs b/crates/oxc_linter/src/rules/eslint/no_control_regex.rs index cf683780a40d8..fb6e66e60bab2 100644 --- a/crates/oxc_linter/src/rules/eslint/no_control_regex.rs +++ b/crates/oxc_linter/src/rules/eslint/no_control_regex.rs @@ -1,25 +1,88 @@ +use std::fmt::Write; + use itertools::Itertools as _; + use oxc_diagnostics::OxcDiagnostic; use oxc_macros::declare_oxc_lint; use oxc_regular_expression::{ - ast::{CapturingGroup, Character, Pattern}, + ast::{CapturingGroup, Character, CharacterKind, Pattern}, visit::{Visit, walk}, }; use oxc_span::Span; use crate::{AstNode, context::LintContext, rule::Rule, utils::run_on_regex_node}; -fn no_control_regex_diagnostic(count: usize, regex: &str, span: Span) -> OxcDiagnostic { +fn no_control_regex_diagnostic(control_chars: &[Character], span: Span) -> OxcDiagnostic { + let count = control_chars.len(); debug_assert!(count > 0); - let (message, help) = if count == 1 { - ("Unexpected control character", format!("'{regex}' is not a valid control character.")) - } else { - ("Unexpected control characters", format!("'{regex}' are not valid control characters.")) - }; - OxcDiagnostic::warn(message).with_help(help).with_label(span) -} + let mut octal_chars = Vec::new(); + let mut null_chars = Vec::new(); + let mut other_chars = Vec::new(); + for ch in control_chars { + match ch.kind { + CharacterKind::Octal1 | CharacterKind::Octal2 | CharacterKind::Octal3 => { + octal_chars.push(ch); + } + CharacterKind::Null => { + null_chars.push(ch); + } + _ => { + other_chars.push(ch); + } + } + } + + let mut help = String::new(); + + if !other_chars.is_empty() { + let regexes = other_chars.iter().join(", "); + writeln!( + help, + "'{regexes}' {} not {}valid control character{}.", + if other_chars.len() > 1 { "are" } else { "is" }, + if other_chars.len() > 1 { "" } else { "a " }, + if other_chars.len() > 1 { "s" } else { "" }, + ) + .unwrap(); + } + + if !octal_chars.is_empty() { + let regexes = octal_chars.iter().join(", "); + writeln!( + help, + "'{regexes}' {} not {}valid control character{}. They look like backreferences, but there {} no corresponding capture group{}. If you are trying to match the octal character, consider using '\\xNN' or '\\u00NN' instead.", + if octal_chars.len() > 1 { "are" } else { "is" }, + if octal_chars.len() > 1 { "" } else { "a " }, + if octal_chars.len() > 1 { "s" } else { "" }, + if octal_chars.len() > 1 { "are" } else { "is" }, + if octal_chars.len() > 1 { "s" } else { "" } + ).unwrap(); + } + + if !null_chars.is_empty() { + writeln!( + help, + "'\\0' matches the null character (U+0000), which is a control character. If you intend to match the null character, consider using '\\x00' or '\\u0000' for clarity." + ).unwrap(); + } + + debug_assert!(!help.is_empty()); + debug_assert!(help.chars().last().is_some_and(|char| char == '\n')); + + if !help.is_empty() { + help.truncate(help.len() - 1); + } + + OxcDiagnostic::warn(if count > 1 { + "Unexpected control characters" + } else { + "Unexpected control character" + }) + .with_help(help) + .with_label(span) +} #[derive(Debug, Default, Clone)] pub struct NoControlRegex; @@ -84,9 +147,7 @@ fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) { finder.visit_pattern(pattern); if !finder.control_chars.is_empty() { - let num_control_chars = finder.control_chars.len(); - let violations = finder.control_chars.into_iter().map(|c| c.to_string()).join(", "); - context.diagnostic(no_control_regex_diagnostic(num_control_chars, &violations, span)); + context.diagnostic(no_control_regex_diagnostic(&finder.control_chars, span)); } } @@ -152,7 +213,7 @@ mod tests { use super::*; use crate::tester::Tester; - #[test] + #[test] // fn test_hex_literals() { Tester::new( NoControlRegex::NAME, @@ -298,6 +359,10 @@ mod tests { r"/\x0d/u", r"/\u{09}/u", r"/\x09/u", + r"/\0\1\2/", + r"/\x1f\2/", + r"/\x1f\0/", + r"/\x1f\0\2/", ], ) .test_and_snapshot(); diff --git a/crates/oxc_linter/src/snapshots/eslint_no_control_regex.snap b/crates/oxc_linter/src/snapshots/eslint_no_control_regex.snap index aaa05502e0d72955fca74edf2f389614f34b1e3c..80654e31f495c59bb2d7cddc7d179580458ee656 100644 GIT binary patch literal 7198 zcmeHM&2G~`5ayhRpmTEqNos)h@LR2w(Ct|)p(=bHFZ=)s(6NS;lv9d zapTUTkg%w$w^?JPDH}lP|$F*9mBzY((YXKERF=_IMp7~y&m@uWF zCU#-7E@Y{OV*y4#egczvKsn$w$^#|1UlXj$qDl)+0~YleRm_FkOF(->5bP0nGmO6f z8r@uf3i#RW<+J$Cx{^`LZrLwBm%xV6%{TZ;1jLu%6_`$BcT9cz-#>Vo9B69hH-5(N zbT4-GnBQ-Kfo9qUcoG7xfZ!v1Z^1O{CA{N7P&yi?KCygY(Bt(vKDnmOU9^Zb2ABIk}5E% z%;t)8PbM1&Rd{Lu5iCv|_oNNEVcyst8#-K2KARDekh6@LCwsz7%*t3x7%^KSCpSw6 zJ7+0}n5>0jN((8AmIQ0k$PSGHue9aYFUgTk9F=v~m`vzsxybtrMDK)GwtLv5!=rjd z^RO-rNhr)W_JW~CqKK&_Mbm@F@R{C>wT&tm;UZVV>^{8EXQ^lcU#)~ z_AJ@i!mfhT$nnkwAA4*7KIa$UdlwAO3B3@k!vqT`lT|nm6{sHmy)7SLRa6Kj`#f;H zpbM1rRfre6f`@%shxZ*A@Gu^$#6XWw9KH}md)f+cs)#=+s8)52ReCNH5K285iy_Cy z25^Bb+;Zu*7hw68xOELTJ`Be)#wMAJjRv8gvI*tW=N2B7DqQwFd@hzt$$Tw0nW`=n z2k!JVr!6j;B~~YrS?YENr%Zh!JT=&T86_Pmx_GozB$`l(qI%QWRcK)+_t!zgF%($iyFE9Ba8+^1Ljq?1#ssnMo!ndUX8* D