Skip to content
Merged
16 changes: 16 additions & 0 deletions .changeset/loose-cooks-report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
"@biomejs/biome": patch
---

Added the nursery rule [`useNamedCaptureGroup`](https://biomejs.dev/linter/rules/use-named-capture-group/).
The rule enforces using named capture groups in regular expressions instead of numbered ones. It supports both regex literals and `RegExp` constructor calls.

```js
// Invalid: unnamed capture group
/(foo)/;
new RegExp("(foo)");

// Valid: named capture group
/(?<id>foo)/;
new RegExp("(?<id>foo)");
```
12 changes: 12 additions & 0 deletions crates/biome_cli/src/execute/migrate/eslint_any_rule_to_biome.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions crates/biome_configuration/src/analyzer/linter/rules.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions crates/biome_diagnostics_categories/src/categories.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

276 changes: 276 additions & 0 deletions crates/biome_js_analyze/src/lint/nursery/use_named_capture_group.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
use biome_analyze::{Rule, RuleDiagnostic, RuleSource, context::RuleContext, declare_lint_rule};
use biome_console::markup;
use biome_js_semantic::SemanticModel;
use biome_js_syntax::{
AnyJsCallArgument, AnyJsExpression, JsCallArguments, JsNewOrCallExpression,
JsRegexLiteralExpression, global_identifier,
};
use biome_rowan::{AstNode, AstSeparatedList, TextRange, TextSize};
use biome_rule_options::use_named_capture_group::UseNamedCaptureGroupOptions;

use crate::services::semantic::Semantic;

declare_lint_rule! {
/// Enforce using named capture groups in regular expression.
///
/// Numbered capture groups like `(...)` can be difficult to work with,
/// as they are matched by their position and not by a descriptive name.
/// Named capture groups (`(?<name>...)`) associate a descriptive name
/// with each match, making the regular expression more readable and
/// its intent clearer.
///
/// ## Examples
///
/// ### Invalid
///
/// ```js,expect_diagnostic
/// /(ba[rz])/;
/// ```
///
/// ```js,expect_diagnostic
/// /([0-9]{4})/;
/// ```
///
/// ```js,expect_diagnostic
/// /(?:ab)(cd)/;
/// ```
///
/// ```js,expect_diagnostic
/// new RegExp("(foo)");
/// ```
///
/// ```js,expect_diagnostic
/// RegExp("(foo)");
/// ```
///
/// ### Valid
///
/// ```js
/// /(?<id>ba[rz])/;
/// /(?:ba[rz])/;
/// /ba[rz]/;
/// /(?<year>[0-9]{4})-(?<month>[0-9]{2})/;
/// new RegExp("(?<id>foo)");
/// new RegExp(pattern);
/// ```
///
pub UseNamedCaptureGroup {
version: "next",
name: "useNamedCaptureGroup",
language: "js",
sources: &[RuleSource::Eslint("prefer-named-capture-group").same()],
recommended: false,
}
}

impl Rule for UseNamedCaptureGroup {
type Query = Semantic<AnyJsExpression>;
type State = TextRange;
type Signals = Box<[Self::State]>;
type Options = UseNamedCaptureGroupOptions;

fn run(ctx: &RuleContext<Self>) -> Self::Signals {
let node = ctx.query();
match node {
AnyJsExpression::AnyJsLiteralExpression(
biome_js_syntax::AnyJsLiteralExpression::JsRegexLiteralExpression(regex),
) => run_regex_literal(regex),
AnyJsExpression::JsNewExpression(_) | AnyJsExpression::JsCallExpression(_) => {
run_regexp_constructor(node, ctx.model())
}
_ => Default::default(),
}
}

fn diagnostic(_: &RuleContext<Self>, state: &Self::State) -> Option<RuleDiagnostic> {
Some(
RuleDiagnostic::new(
rule_category!(),
state,
markup! {
"Capture group is not named."
},
)
.note(markup! {
"Named capture groups improve readability by associating a descriptive name with each match. Use "<Emphasis>"(?<name>...)"</Emphasis>" instead of "<Emphasis>"(...)"</Emphasis>"."
}),
)
}
}

/// Find byte offsets of unnamed capture groups in a regex pattern.
///
/// Returns a list of byte offsets (relative to pattern start) for each
/// unnamed capture group `(` found.
fn find_unnamed_capture_groups(pattern: &str) -> Vec<u32> {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: move all helpers below the impl Rule block

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks i fixed it 0f54573

let mut result = Vec::new();
let mut bytes = pattern.as_bytes().iter().enumerate().peekable();
while let Some((i, &byte)) = bytes.next() {
match byte {
b'\\' => {
bytes.next();
}
b'[' => {
while let Some((_, &b)) = bytes.next() {
match b {
b'\\' => {
bytes.next();
}
b']' => break,
_ => {}
}
}
}
b'(' => {
if bytes.peek().is_some_and(|&(_, &b)| b == b'?') {
// `(?:`, `(?=`, `(?!`, `(?<=`, `(?<!`, `(?<name>` — skip
} else {
result.push(i as u32);
}
}
_ => {}
}
}
result
}

fn is_regexp_object(expr: &AnyJsExpression, model: &SemanticModel) -> bool {
match global_identifier(&expr.clone().omit_parentheses()) {
Some((reference, name)) => match model.binding(&reference) {
Some(_) if !reference.is_global_this() && !reference.has_name("window") => false,
_ => name.text() == "RegExp",
},
None => false,
}
}

fn parse_regexp_node(
node: &JsNewOrCallExpression,
) -> Option<(AnyJsExpression, JsCallArguments)> {
match node {
JsNewOrCallExpression::JsNewExpression(node) => {
let callee = node.callee().ok()?;
let args = node.arguments()?;
Some((callee, args))
}
JsNewOrCallExpression::JsCallExpression(node) => {
let callee = node.callee().ok()?;
let args = node.arguments().ok()?;
Some((callee, args))
}
}
}

fn get_first_arg_expr(arguments: &JsCallArguments) -> Option<AnyJsExpression> {
let first_arg = arguments.args().iter().next()?;
let Ok(AnyJsCallArgument::AnyJsExpression(expr)) = first_arg else {
return None;
};
Some(expr)
}

/// Try to compute precise TextRange for each unnamed group in a string literal.
/// Returns `Some` if the argument is a simple string literal without escape sequences
/// (so byte offsets map 1:1 to source positions). Returns `None` otherwise.
fn try_precise_string_ranges(arg_expr: &AnyJsExpression) -> Option<Box<[TextRange]>> {
let AnyJsExpression::AnyJsLiteralExpression(
biome_js_syntax::AnyJsLiteralExpression::JsStringLiteralExpression(string_lit),
) = arg_expr
else {
return None;
};
let token = string_lit.value_token().ok()?;
let token_text = token.text_trimmed();
let raw_inner = &token_text[1..token_text.len() - 1];
let inner_text = string_lit.inner_string_text().ok()?;
// If raw source and interpreted text differ, escapes are present
if raw_inner != inner_text.text() {
return None;
}
Comment on lines +184 to +189
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand when this is the case. Are there tests for this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for missing the test cases earlier.

raw_inner != inner_text becomes true when the string contains escape sequences.
Without this guard find_unnamed_capture_groups would run on the raw source and produce incorrect offsets.

To clarify the behavior I added tests covering both directions

  • valid: new RegExp("\\(foo)") — using the raw source \\(foo) would incorrectly report ( as an unnamed group, while the actual regex \(foo) has no capture group.

  • invalid: new RegExp("\\d+(foo)") — since escapes are present, precise mapping is skipped but the fallback path correctly detects the unnamed group (foo) using the interpreted string \d+(foo).

fcce15a

let offsets = find_unnamed_capture_groups(raw_inner);
if offsets.is_empty() {
return Some(Default::default());
}
let content_start = token.text_trimmed_range().start() + TextSize::from(1);
Some(
offsets
.into_iter()
.map(|offset| {
let start = content_start + TextSize::from(offset);
TextRange::new(start, start + TextSize::from(1))
})
.collect(),
)
}

fn run_regex_literal(node: &JsRegexLiteralExpression) -> Box<[TextRange]> {
let Ok((pattern, _flags)) = node.decompose() else {
return Default::default();
};
let pattern_text = pattern.text();
let offsets = find_unnamed_capture_groups(pattern_text);
if offsets.is_empty() {
return Default::default();
}
let pattern_start = node.range().start() + TextSize::from(1);
offsets
.into_iter()
.map(|offset| {
let start = pattern_start + TextSize::from(offset);
TextRange::new(start, start + TextSize::from(1))
})
.collect()
}

fn run_regexp_constructor(node: &AnyJsExpression, model: &SemanticModel) -> Box<[TextRange]> {
let new_or_call = match node {
AnyJsExpression::JsNewExpression(n) => JsNewOrCallExpression::from(n.clone()),
AnyJsExpression::JsCallExpression(n) => JsNewOrCallExpression::from(n.clone()),
_ => return Default::default(),
};
let Some((callee, arguments)) = parse_regexp_node(&new_or_call) else {
return Default::default();
};
if !is_regexp_object(&callee, model) {
return Default::default();
}
let Some(arg_expr) = get_first_arg_expr(&arguments) else {
return Default::default();
};
// Try precise per-group diagnostics for simple string literals (no escapes)
if let Some(ranges) = try_precise_string_ranges(&arg_expr) {
return ranges;
}
// Fallback: use interpreted value, single diagnostic on the whole expression
let Some(static_val) = arg_expr.omit_parentheses().as_static_value() else {
return Default::default();
};
let Some(pattern) = static_val.as_string_constant() else {
return Default::default();
};
if find_unnamed_capture_groups(pattern).is_empty() {
return Default::default();
}
Box::new([node.range()])
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_unnamed_capture_groups() {
assert_eq!(find_unnamed_capture_groups("(foo)"), vec![0]);
assert_eq!(find_unnamed_capture_groups("(?:foo)"), Vec::<u32>::new());
assert_eq!(find_unnamed_capture_groups("(?<name>foo)"), Vec::<u32>::new());
assert_eq!(find_unnamed_capture_groups("(?=foo)"), Vec::<u32>::new());
assert_eq!(find_unnamed_capture_groups("(?!foo)"), Vec::<u32>::new());
assert_eq!(find_unnamed_capture_groups("(?<=foo)"), Vec::<u32>::new());
assert_eq!(find_unnamed_capture_groups("(?<!foo)"), Vec::<u32>::new());
assert_eq!(find_unnamed_capture_groups("(foo)(bar)"), vec![0, 5]);
assert_eq!(find_unnamed_capture_groups("\\(foo)"), Vec::<u32>::new());
assert_eq!(find_unnamed_capture_groups("\\\\(foo)"), vec![2]);
assert_eq!(find_unnamed_capture_groups("[(]foo"), Vec::<u32>::new());
assert_eq!(find_unnamed_capture_groups("[\\]]foo"), Vec::<u32>::new());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/(ba[rz])/;
/([0-9]{4})/;
/(foo)(bar)/;
/(?:ab)(cd)/;
/([a-z])\1/;
new RegExp("(foo)");
RegExp("(foo)");
new RegExp("(foo)(bar)");
new RegExp("\\d+(foo)");
Loading