diff --git a/crates/oxc_linter/src/ast_util.rs b/crates/oxc_linter/src/ast_util.rs index eec8ce8a12afd..a7831f3fc4bb5 100644 --- a/crates/oxc_linter/src/ast_util.rs +++ b/crates/oxc_linter/src/ast_util.rs @@ -10,7 +10,10 @@ use oxc_ast::{ use oxc_ecmascript::{ToBoolean, WithoutGlobalReferenceInformation}; use oxc_semantic::{AstNode, AstNodes, IsGlobalReference, NodeId, ReferenceId, Semantic, SymbolId}; use oxc_span::{GetSpan, Span}; -use oxc_syntax::operator::{AssignmentOperator, BinaryOperator, LogicalOperator, UnaryOperator}; +use oxc_syntax::{ + identifier::is_irregular_whitespace, + operator::{AssignmentOperator, BinaryOperator, LogicalOperator, UnaryOperator}, +}; use crate::{LintContext, utils::get_function_nearest_jsdoc_node}; @@ -975,3 +978,159 @@ pub fn is_node_within_call_argument<'a>( let arg_span = target_arg.span(); node_span.start >= arg_span.start && node_span.end <= arg_span.end } + +/// Determines if a semicolon is needed before inserting code that starts with +/// certain characters (`[`, `(`, `/`, `+`, `-`, `` ` ``) that could be misinterpreted +/// due to Automatic Semicolon Insertion (ASI) rules. +/// +/// Returns `true` if the node is at the start of an `ExpressionStatement` and the +/// character before it could cause the replacement to be parsed as a continuation +/// of the previous expression. +pub fn could_be_asi_hazard(node: &AstNode, ctx: &LintContext) -> bool { + let node_span = node.span(); + + // Find the enclosing ExpressionStatement, bailing early for nodes that can't + // be at statement start position + let mut expr_stmt_span = None; + for ancestor in ctx.nodes().ancestors(node.id()) { + match ancestor.kind() { + AstKind::ExpressionStatement(expr_stmt) => { + expr_stmt_span = Some(expr_stmt.span); + break; + } + // Expression types that can have our node at their start position + AstKind::CallExpression(_) + | AstKind::ComputedMemberExpression(_) + | AstKind::StaticMemberExpression(_) + | AstKind::PrivateFieldExpression(_) + | AstKind::ChainExpression(_) + | AstKind::TaggedTemplateExpression(_) + | AstKind::SequenceExpression(_) + | AstKind::AssignmentExpression(_) + | AstKind::LogicalExpression(_) + | AstKind::BinaryExpression(_) + | AstKind::ConditionalExpression(_) + | AstKind::AwaitExpression(_) + | AstKind::ParenthesizedExpression(_) + | AstKind::TSAsExpression(_) + | AstKind::TSSatisfiesExpression(_) + | AstKind::TSNonNullExpression(_) + | AstKind::TSTypeAssertion(_) + | AstKind::TSInstantiationExpression(_) => {} + _ => return false, + } + } + + let Some(expr_stmt_span) = expr_stmt_span else { + return false; + }; + + // Node must be at the start of the statement for ASI hazard to apply + if node_span.start != expr_stmt_span.start { + return false; + } + + if expr_stmt_span.start == 0 { + return false; + } + + let source_text = ctx.source_text(); + let last_char = find_last_meaningful_char(source_text, expr_stmt_span.start, ctx); + + let Some(last_char) = last_char else { + return false; + }; + + // Characters that could cause ASI issues when followed by `[`, `(`, `/`, etc. + matches!(last_char, ')' | ']' | '}' | '"' | '\'' | '`' | '+' | '-' | '/' | '.') + || last_char.is_alphanumeric() + || last_char == '_' + || last_char == '$' +} + +#[inline] +#[expect(clippy::cast_possible_wrap)] +fn is_utf8_char_boundary(b: u8) -> bool { + (b as i8) >= -0x40 +} + +/// Find the last meaningful (non-whitespace, non-comment) character before `end_pos`. +fn find_last_meaningful_char(source_text: &str, end_pos: u32, ctx: &LintContext) -> Option { + let bytes = source_text.as_bytes(); + let comments = ctx.semantic().comments(); + + let mut comment_idx = comments.partition_point(|c| c.span.start < end_pos); + let mut current_comment_end: u32 = 0; + let mut i = end_pos; + + // Handle case where end_pos is inside a comment + if comment_idx > 0 { + let prev_comment = &comments[comment_idx - 1]; + if end_pos <= prev_comment.span.end { + i = prev_comment.span.start; + comment_idx -= 1; + if comment_idx > 0 { + current_comment_end = comments[comment_idx - 1].span.end; + } + } + } + + while i > 0 { + if i <= current_comment_end && comment_idx > 0 { + comment_idx -= 1; + current_comment_end = comments[comment_idx].span.start; + i = current_comment_end; + continue; + } + + i -= 1; + + let byte = bytes[i as usize]; + + if byte.is_ascii_whitespace() { + continue; + } + + // Check if we're entering a comment from the end + if comment_idx > 0 { + let comment = &comments[comment_idx - 1]; + if i >= comment.span.start && i < comment.span.end { + i = comment.span.start; + comment_idx -= 1; + if comment_idx > 0 { + current_comment_end = comments[comment_idx - 1].span.end; + } + continue; + } + } + + if byte.is_ascii() { + return Some(byte as char); + } + + // Multi-byte UTF-8: find the start byte (max 4 bytes per char) + let i_usize = i as usize; + let char_start = if is_utf8_char_boundary(bytes[i_usize.saturating_sub(1)]) { + i_usize - 1 + } else if is_utf8_char_boundary(bytes[i_usize.saturating_sub(2)]) { + i_usize - 2 + } else { + i_usize - 3 + }; + + let c = source_text[char_start..].chars().next().unwrap(); + + // Skip irregular whitespace (NBSP, ZWNBSP, etc.) + if is_irregular_whitespace(c) { + #[expect(clippy::cast_possible_truncation)] + { + i = char_start as u32; + } + continue; + } + + return Some(c); + } + + None +} diff --git a/crates/oxc_linter/src/rules/unicorn/prefer_spread.rs b/crates/oxc_linter/src/rules/unicorn/prefer_spread.rs index b5b2e9149bab9..1cb9593419370 100644 --- a/crates/oxc_linter/src/rules/unicorn/prefer_spread.rs +++ b/crates/oxc_linter/src/rules/unicorn/prefer_spread.rs @@ -52,11 +52,15 @@ impl Rule for PreferSpread { return; }; - check_unicorn_prefer_spread(call_expr, ctx); + check_unicorn_prefer_spread(node, call_expr, ctx); } } -fn check_unicorn_prefer_spread(call_expr: &CallExpression, ctx: &LintContext) { +fn check_unicorn_prefer_spread<'a>( + node: &AstNode<'a>, + call_expr: &CallExpression<'a>, + ctx: &LintContext<'a>, +) { let Some(member_expr) = call_expr.callee.without_parentheses().as_member_expression() else { return; }; @@ -87,7 +91,7 @@ fn check_unicorn_prefer_spread(call_expr: &CallExpression, ctx: &LintContext) { return; } - report_with_spread_fixer(ctx, call_expr.span, "Array.from()", expr); + report_with_spread_fixer(node, ctx, call_expr.span, "Array.from()", expr); } // `array.concat()` "concat" => { @@ -131,7 +135,7 @@ fn check_unicorn_prefer_spread(call_expr: &CallExpression, ctx: &LintContext) { } } - report_with_spread_fixer(ctx, call_expr.span, "array.slice()", member_expr_obj); + report_with_spread_fixer(node, ctx, call_expr.span, "array.slice()", member_expr_obj); } // `array.toSpliced()` "toSpliced" => { @@ -145,6 +149,7 @@ fn check_unicorn_prefer_spread(call_expr: &CallExpression, ctx: &LintContext) { } report_with_spread_fixer( + node, ctx, call_expr.span, "array.toSpliced()", @@ -171,10 +176,15 @@ fn check_unicorn_prefer_spread(call_expr: &CallExpression, ctx: &LintContext) { ctx.diagnostic_with_fix( unicorn_prefer_spread_diagnostic(call_expr.span, "string.split()"), |fixer| { + let needs_semi = ast_util::could_be_asi_hazard(node, ctx); let callee_obj = member_expr.object().without_parentheses(); + let prefix = if needs_semi { ";" } else { "" }; fixer.replace( call_expr.span, - format!("[...{}]", callee_obj.span().source_text(ctx.source_text())), + format!( + "{prefix}[...{}]", + callee_obj.span().source_text(ctx.source_text()) + ), ) }, ); @@ -241,13 +251,18 @@ fn is_not_array(expr: &Expression, ctx: &LintContext) -> bool { } fn report_with_spread_fixer( + node: &AstNode, ctx: &LintContext, span: Span, bad_method: &str, expr_to_spread: &Expression, ) { ctx.diagnostic_with_fix(unicorn_prefer_spread_diagnostic(span, bad_method), |fixer| { + let needs_semi = ast_util::could_be_asi_hazard(node, ctx); let mut codegen = fixer.codegen(); + if needs_semi { + codegen.print_str(";"); + } codegen.print_str("[..."); codegen.print_expression(expr_to_spread); codegen.print_str("]"); @@ -542,22 +557,118 @@ fn test() { // `Array.from()` ("const x = Array.from(set);", "const x = [...set];", None), ("Array.from(new Set([1, 2])).map(() => {});", "[...new Set([1, 2])].map(() => {});", None), + // `Array.from()` - ASI hazard cases (need semicolon prefix) + ( + "const foo = bar\nArray.from(set).map(() => {})", + "const foo = bar\n;[...set].map(() => {})", + None, + ), + ( + "foo()\nArray.from(set).forEach(doSomething)", + "foo()\n;[...set].forEach(doSomething)", + None, + ), + // `Array.from()` - No ASI hazard (semicolon already present) + ( + "const foo = bar;\nArray.from(set).map(() => {})", + "const foo = bar;\n[...set].map(() => {})", + None, + ), + // `Array.from()` - ASI hazard with comments before + ( + "foo() /* comment */\nArray.from(set).map(() => {})", + "foo() /* comment */\n;[...set].map(() => {})", + None, + ), + ( + "foo() // comment\nArray.from(set).map(() => {})", + "foo() // comment\n;[...set].map(() => {})", + None, + ), // `array.slice()` ("array.slice()", "[...array]", None), ("array.slice(1).slice()", "[...array.slice(1)]", None), + // `array.slice()` - ASI hazard cases + ("foo()\narray.slice()", "foo()\n;[...array]", None), // `array.toSpliced()` ("array.toSpliced()", "[...array]", None), ("const copy = array.toSpliced()", "const copy = [...array]", None), - // ("", "", None), - // ("", "", None), + // `array.toSpliced()` - ASI hazard cases + ("foo()\narray.toSpliced()", "foo()\n;[...array]", None), // `string.split()` (r#""🦄".split("")"#, r#"[..."🦄"]"#, None), (r#""foo bar baz".split("")"#, r#"[..."foo bar baz"]"#, None), + // `string.split()` - ASI hazard cases + ("foo()\nstr.split(\"\")", "foo()\n;[...str]", None), ( r"Array.from(path.matchAll(/\{([^{}?]+\??)\}/g))", "[...path.matchAll(/\\{([^{}?]+\\??)\\}/g)]", None, ), + // Cases where NO semicolon should be added (not an ExpressionStatement) + ("return Array.from(set)", "return [...set]", None), + ("const x = Array.from(set)", "const x = [...set]", None), + ("foo(Array.from(set))", "foo([...set])", None), + ("if (Array.from(set).length) {}", "if ([...set].length) {}", None), + // `Array.from()` - ASI hazard with multi-byte Unicode identifiers + ("日本語\nArray.from(set).map(() => {})", "日本語\n;[...set].map(() => {})", None), + ( + "const foo = 日本語\nArray.from(set).map(() => {})", + "const foo = 日本語\n;[...set].map(() => {})", + None, + ), + ("/**/Array.from(set).map(() => {})", "/**/[...set].map(() => {})", None), + ("/regex/\nArray.from(set).map(() => {})", "/regex/\n;[...set].map(() => {})", None), + ("/regex/g\nArray.from(set).map(() => {})", "/regex/g\n;[...set].map(() => {})", None), + ("0.\nArray.from(set).map(() => {})", "0.\n;[...set].map(() => {})", None), + ( + "foo()\u{00A0}\nArray.from(set).map(() => {})", + "foo()\u{00A0}\n;[...set].map(() => {})", + None, + ), + ( + "foo()\u{FEFF}\nArray.from(set).map(() => {})", + "foo()\u{FEFF}\n;[...set].map(() => {})", + None, + ), + ("foo() /* a */ /* b */\nArray.from(set)", "foo() /* a */ /* b */\n;[...set]", None), + ("x++\narray.slice()", "x++\n;[...array]", None), + ("x--\narray.slice()", "x--\n;[...array]", None), + ("arr[0]\narray.slice()", "arr[0]\n;[...array]", None), + ("obj.prop\narray.slice()", "obj.prop\n;[...array]", None), + ("while (array.slice().length) {}", "while ([...array].length) {}", None), + ("do {} while (array.slice().length)", "do {} while ([...array].length)", None), + ("for (array.slice();;) {}", "for ([...array];;) {}", None), + ("switch (array.slice()[0]) {}", "switch ([...array][0]) {}", None), + ("`template`\narray.toSpliced()", "`template`\n;[...array]", None), + ( + r#"'string' +str.split("")"#, + "'string'\n;[...str]", + None, + ), + ( + r#""string" +str.split("")"#, + r#""string" +;[...str]"#, + None, + ), + ( + "foo()\nArray.from(set).map(x => x).filter(Boolean).length", + "foo()\n;[...set].map(x => x).filter(Boolean).length", + None, + ), + ("const fn = () => Array.from(set)", "const fn = () => [...set]", None), + ("foo ? Array.from(a) : b", "foo ? [...a] : b", None), + ("foo || Array.from(set)", "foo || [...set]", None), + ("foo && Array.from(set)", "foo && [...set]", None), + ("foo + Array.from(set).length", "foo + [...set].length", None), + ("x = Array.from(set)", "x = [...set]", None), + ("const obj = { arr: Array.from(set) }", "const obj = { arr: [...set] }", None), + ("(foo, Array.from(set))", "(foo, [...set])", None), + ("[Array.from(set)]", "[[...set]]", None), + ("async () => await Array.from(set)", "async () => await [...set]", None), ]; Tester::new(PreferSpread::NAME, PreferSpread::PLUGIN, pass, fail)