From e3e37abbdb0fad786b7535657b975dd885edd612 Mon Sep 17 00:00:00 2001 From: Victorien Elvinger Date: Tue, 22 Oct 2024 18:46:26 +0200 Subject: [PATCH] feat(organize_import): move legacy code to a deidicated module an dintroduce restricted globs (#4357) --- Cargo.lock | 1 + Cargo.toml | 1 + crates/biome_js_analyze/Cargo.toml | 1 + .../src/assists/source/organize_imports.rs | 717 ++---------------- .../assists/source/organize_imports/legacy.rs | 674 ++++++++++++++++ crates/biome_js_analyze/src/utils.rs | 1 + .../src/utils/restricted_glob.rs | 261 +++++++ .../src/utils/restricted_regex.rs | 294 ++++--- .../invalidCustomRegexAnchor.js.snap | 4 +- 9 files changed, 1191 insertions(+), 763 deletions(-) create mode 100644 crates/biome_js_analyze/src/assists/source/organize_imports/legacy.rs create mode 100644 crates/biome_js_analyze/src/utils/restricted_glob.rs diff --git a/Cargo.lock b/Cargo.lock index 9cd4378c1f27..05b99955c80f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -750,6 +750,7 @@ dependencies = [ "biome_unicode_table", "bitvec", "enumflags2", + "globset", "insta", "natord", "regex", diff --git a/Cargo.toml b/Cargo.toml index 80400dac1628..5e6a026c6cf4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -178,6 +178,7 @@ crossbeam = "0.8.4" dashmap = "6.1.0" enumflags2 = "0.7.10" getrandom = "0.2.15" +globset = "0.4.15" ignore = "0.4.23" indexmap = { version = "2.6.0", features = ["serde"] } insta = "1.40.0" diff --git a/crates/biome_js_analyze/Cargo.toml b/crates/biome_js_analyze/Cargo.toml index 064caedee302..455caf01b4ea 100644 --- a/crates/biome_js_analyze/Cargo.toml +++ b/crates/biome_js_analyze/Cargo.toml @@ -28,6 +28,7 @@ biome_suppression = { workspace = true } biome_unicode_table = { workspace = true } bitvec = "1.0.1" enumflags2 = { workspace = true } +globset = { workspace = true } natord = { workspace = true } regex = { workspace = true } roaring = "0.10.6" diff --git a/crates/biome_js_analyze/src/assists/source/organize_imports.rs b/crates/biome_js_analyze/src/assists/source/organize_imports.rs index dc325f765b26..b80e00b4bc7f 100644 --- a/crates/biome_js_analyze/src/assists/source/organize_imports.rs +++ b/crates/biome_js_analyze/src/assists/source/organize_imports.rs @@ -1,21 +1,15 @@ -use std::{cell::Cell, cmp::Ordering, collections::BTreeMap, iter, mem::take}; - use biome_analyze::{ context::RuleContext, declare_source_rule, ActionCategory, Ast, FixKind, Rule, SourceActionKind, }; use biome_console::markup; -use biome_js_factory::make; -use biome_js_syntax::{ - AnyJsImportClause, AnyJsModuleItem, AnyJsNamedImportSpecifier, JsImport, JsLanguage, JsModule, - JsSyntaxToken, JsSyntaxTrivia, TextRange, TriviaPieceKind, T, -}; -use biome_rowan::{ - chain_trivia_pieces, AstNode, AstNodeExt, AstNodeList, AstSeparatedList, BatchMutationExt, - SyntaxTriviaPiece, TokenText, TriviaPiece, -}; +use biome_deserialize::Deserializable; +use biome_deserialize_macros::Deserializable; +use biome_js_syntax::JsModule; +use biome_rowan::BatchMutationExt; -use crate::JsRuleAction; +use crate::{utils::restricted_glob::RestrictedGlob, JsRuleAction}; +pub mod legacy; pub mod util; declare_source_rule! { @@ -53,214 +47,23 @@ declare_source_rule! { impl Rule for OrganizeImports { type Query = Ast; - type State = ImportGroups; + type State = State; type Signals = Option; - type Options = (); + type Options = Options; fn run(ctx: &RuleContext) -> Option { let root = ctx.query(); - let mut groups = Vec::new(); - let mut first_node = None; - let mut nodes = BTreeMap::new(); - - for item in root.items() { - let AnyJsModuleItem::JsImport(import) = item else { - // If we have pending nodes and encounter a non-import node, append the nodes to a new group - if let Some(first_node) = first_node.take() { - groups.push(ImportGroup { - first_node, - nodes: take(&mut nodes), - }); - } - continue; - }; - - let is_side_effect_import = matches!( - import.import_clause(), - Ok(AnyJsImportClause::JsImportBareClause(_)) - ); - if is_side_effect_import { - if let Some(first_node) = first_node.take() { - groups.push(ImportGroup { - first_node, - nodes: take(&mut nodes), - }); - } - // A side effect import creates its own import group - let mut nodes = BTreeMap::new(); - nodes.insert( - ImportKey(import.source_text().ok()?), - vec![ImportNode::from(import.clone())], - ); - groups.push(ImportGroup { - first_node: import.clone(), - nodes, - }); - continue; - } - - // If this is not the first import in the group, check for a group break - if has_empty_line(&import.import_token().ok()?.leading_trivia()) { - if let Some(first_node) = first_node.take() { - groups.push(ImportGroup { - first_node, - nodes: take(&mut nodes), - }); - } - } - - // If this is the first import in the group save the leading trivia - // and slot index - if first_node.is_none() { - first_node = Some(import.clone()); - } - - nodes - .entry(ImportKey(import.source_text().ok()?)) - .or_default() - .push(ImportNode::from(import)); - } - - // Flush the remaining nodes - if let Some(first_node) = first_node.take() { - groups.push(ImportGroup { first_node, nodes }); - } - - groups - .iter() - .any(|group| !group.is_sorted()) - .then_some(ImportGroups { groups }) + legacy::run(root).map(State::Legacy) } - fn action(ctx: &RuleContext, groups: &Self::State) -> Option { - let mut groups_iter = groups.groups.iter(); - let mut next_group = groups_iter.next().expect("state is empty"); - - let old_list = ctx.query().items(); - let mut new_list = Vec::new(); - - let mut items_iter = old_list.iter(); - let mut iter = (&mut items_iter).enumerate(); - - // Iterate other the nodes of the old list - while let Some((item_slot, item)) = iter.next() { - // If the current position in the old list is lower than the start - // of the new group, append the old node to the new list - if item_slot < next_group.first_node.syntax().index() { - new_list.push(item); - continue; - } - - // Extract the leading trivia for the whole group from the leading - // trivia for the import token of the first node in the group. If - // the trivia contains empty lines the leading trivia for the group - // comprise all trivia pieces coming before the empty line that's - // closest to the token. Otherwise the group leading trivia is - // created from all the newline and whitespace pieces on the first - // token before the first comment or skipped piece. - let group_first_token = next_group.first_node.import_token().ok()?; - let group_leading_trivia = group_first_token.leading_trivia(); - - let mut prev_newline = None; - let mut group_leading_trivia: Vec<_> = group_leading_trivia - .pieces() - .enumerate() - .rev() - .find_map(|(index, piece)| { - if piece.is_whitespace() { - return None; - } - - let is_newline = piece.is_newline(); - if let Some(first_newline) = prev_newline.filter(|_| is_newline) { - return Some(first_newline + 1); - } - - prev_newline = is_newline.then_some(index); - None - }) - .map_or_else( - || { - group_leading_trivia - .pieces() - .take_while(is_ascii_whitespace) - .collect() - }, - |length| group_leading_trivia.pieces().take(length).collect(), - ); - - let mut saved_leading_trivia = Vec::new(); - let group_leading_pieces = group_leading_trivia.len(); - - let nodes_iter = next_group - .nodes - .values() - // TODO: Try to merge nodes from the same source - .flat_map(|nodes| nodes.iter()) - .enumerate(); - - for (node_index, import_node) in nodes_iter { - // For each node in the group, pop an item from the old list - // iterator (ignoring `item` itself) and discard it - if node_index > 0 { - iter.next() - .unwrap_or_else(|| panic!("mising node {item_slot} {node_index}")); - } - - let first_token = import_node.node.import_token().ok()?; - let mut node = import_node.build_sorted_node(); - - if node_index == 0 && group_first_token != first_token { - // If this node was not previously in the leading position - // but is being moved there, replace its leading whitespace - // with the group's leading trivia - let group_leading_trivia = group_leading_trivia.drain(..); - let mut token_leading_trivia = first_token.leading_trivia().pieces().peekable(); - - // Save off the leading whitespace of the token to be - // reused by the import take the place of this node in the list - while let Some(piece) = token_leading_trivia.next_if(is_ascii_whitespace) { - saved_leading_trivia.push(piece); - } - - node = node.with_import_token(first_token.with_leading_trivia_pieces( - chain_trivia_pieces(group_leading_trivia, token_leading_trivia), - )); - } else if node_index > 0 && group_first_token == first_token { - // If this node used to be in the leading position but - // got moved, remove the group leading trivia from its - // first token - let saved_leading_trivia = saved_leading_trivia.drain(..); - let token_leading_trivia = first_token - .leading_trivia() - .pieces() - .skip(group_leading_pieces); - - node = node.with_import_token(first_token.with_leading_trivia_pieces( - chain_trivia_pieces(saved_leading_trivia, token_leading_trivia), - )); - } - - new_list.push(AnyJsModuleItem::JsImport(node)); + fn action(ctx: &RuleContext, state: &Self::State) -> Option { + let mut mutation = ctx.root().begin(); + match state { + State::Legacy(groups) => { + legacy::action(ctx.query(), groups, &mut mutation)?; } - - // Load the next group before moving on to the next item in the old - // list, breaking the loop if there a no remaining groups to insert - next_group = match groups_iter.next() { - Some(entry) => entry, - None => break, - }; + State::Modern => {} } - - // Append all remaining nodes to the new list if the loop performed an - // early exit after reaching the last group - new_list.extend(items_iter); - - let new_list = make::js_module_item_list(new_list); - - let mut mutation = ctx.root().begin(); - mutation.replace_node_discard_trivia(old_list, new_list); - Some(JsRuleAction::new( ActionCategory::Source(SourceActionKind::OrganizeImports), ctx.metadata().applicability(), @@ -271,455 +74,51 @@ impl Rule for OrganizeImports { } #[derive(Debug)] -pub struct ImportGroups { - /// The list of all the import groups in the file - groups: Vec, -} - -#[derive(Debug)] -struct ImportGroup { - /// The import that was at the start of the group before sorting - first_node: JsImport, - /// Multimap storing all the imports for each import source in the group, - /// sorted in natural order - nodes: BTreeMap>, -} - -impl ImportGroup { - /// Returns true if the nodes in the group are already sorted in the file - fn is_sorted(&self) -> bool { - // The imports are sorted if the text position of each node in the `BTreeMap` - // (sorted in natural order) is higher than the previous item in - // the sequence - let mut iter = self.nodes.values().flat_map(|nodes| nodes.iter()); - let Some(import_node) = iter.next() else { - return true; - }; - let mut previous_start = import_node.node.syntax().text_range().end(); - import_node.is_sorted() - && iter.all(|import_node| { - let start = import_node.node.syntax().text_range().end(); - let is_sorted = previous_start < start && import_node.is_sorted(); - previous_start = start; - is_sorted - }) - } -} - -#[derive(Debug)] -struct ImportNode { - /// The original `JsImport` node this import node was created from - node: JsImport, - /// The number of separators present in the named specifiers list of this node if it has one - separator_count: usize, - /// Map storing all the named import specifiers and their associated trailing separator, - /// sorted in natural order - specifiers: BTreeMap)>, -} - -impl From for ImportNode { - fn from(node: JsImport) -> Self { - let import_clause = node.import_clause().ok(); - - let mut separator_count = 0; - let specifiers = import_clause.and_then(|import_clause| { - let AnyJsImportClause::JsImportNamedClause(import_named_clause) = import_clause else { - return None; - }; - let named_import_specifiers = import_named_clause.named_specifiers().ok()?; - let mut result = BTreeMap::new(); - - for element in named_import_specifiers.specifiers().elements() { - let node = element.node.ok()?; - let key = node.imported_name()?.token_text_trimmed(); - - let trailing_separator = element.trailing_separator.ok()?; - separator_count += usize::from(trailing_separator.is_some()); - - result.insert(ImportKey(key), (node, trailing_separator)); - } - - Some(result) - }); - - Self { - node, - separator_count, - specifiers: specifiers.unwrap_or_default(), - } - } -} - -impl ImportNode { - /// Returns `true` if the named import specifiers of this import node are sorted - fn is_sorted(&self) -> bool { - let mut iter = self - .specifiers - .values() - .map(|(node, _)| node.syntax().text_range().start()); - let mut previous_start = iter.next().unwrap_or_default(); - iter.all(|start| { - let is_sorted = previous_start < start; - previous_start = start; - is_sorted - }) - } - - /// Build a clone of the original node this import node was created from with its import specifiers sorted - fn build_sorted_node(&self) -> JsImport { - let import = self.node.clone().detach(); - - let import_clause = import.import_clause(); - let Ok(AnyJsImportClause::JsImportNamedClause(import_named_clause)) = import_clause else { - return import; - }; - let Ok(old_specifiers) = import_named_clause.named_specifiers() else { - return import; - }; - - let element_count = self.specifiers.len(); - let last_element = element_count.saturating_sub(1); - let separator_count = self.separator_count.max(last_element); - let needs_newline: Cell>> = Cell::new(None); - - let items = self - .specifiers - .values() - .enumerate() - .map(|(index, (node, sep))| { - let is_last = index == last_element; - - let mut node = node.clone().detach(); - let Some(prev_token) = node.syntax().last_token() else { - return node; - }; - - if let Some(sep) = sep { - if is_last && separator_count == last_element { - // If this is the last item and we are removing its trailing separator, - // move the trailing trivia from the separator to the node - let next_token = - prev_token.append_trivia_pieces(sep.trailing_trivia().pieces()); - - node = node - .replace_token_discard_trivia(prev_token, next_token) - .expect("prev_token should be a child of node"); - } - } else if !is_last { - // If the node has no separator and this is not the last item, - // remove the trailing trivia since it will get cloned on the inserted separator - let next_token = prev_token.with_trailing_trivia([]); - node = node - .replace_token_discard_trivia(prev_token, next_token) - .expect("prev_token should be a child of node"); - } - - // Check if the last separator we emitted ended with a single-line comment - if let Some(newline_source) = needs_newline.take() { - if let Some(first_token) = node.syntax().first_token() { - if let Some(new_token) = - prepend_leading_newline(&first_token, newline_source) - { - node = node - .replace_token_discard_trivia(first_token, new_token) - .expect("first_token should be a child of node"); - } - } - } - - node - }); - - let separators = self - .specifiers - .values() - .take(separator_count) - .map(|(node, sep)| { - // If this entry has an associated separator, reuse it - let (token, will_need_newline) = if let Some(sep) = sep { - // If the last trivia piece for the separator token is a single-line comment, - // signal to the items iterator it will need to prepend a newline to the leading - // trivia of the next node - let will_need_newline = sep - .trailing_trivia() - .last() - .map_or(false, |piece| piece.kind().is_single_line_comment()); - - (sep.clone(), will_need_newline) - } else { - // If the node we're attaching this separator to has no trailing trivia, just create a simple comma token - let last_trailing_trivia = match node.syntax().last_trailing_trivia() { - Some(trivia) if !trivia.is_empty() => trivia, - _ => { - let sep = make::token(T![,]); - return if node.syntax().has_leading_newline() { - sep - } else { - sep.with_trailing_trivia([(TriviaPieceKind::Whitespace, " ")]) - }; - } - }; - - // Otherwise we need to clone the trailing trivia from the node to the separator - // (the items iterator should have already filtered this trivia when it previously - // emitted the node) - let mut text = String::from(","); - let mut trailing = Vec::with_capacity(last_trailing_trivia.pieces().len()); - - let mut will_need_newline = false; - for piece in last_trailing_trivia.pieces() { - text.push_str(piece.text()); - trailing.push(TriviaPiece::new(piece.kind(), piece.text_len())); - will_need_newline = - matches!(piece.kind(), TriviaPieceKind::SingleLineComment); - } - - let token = JsSyntaxToken::new_detached(T![,], &text, [], trailing); - (token, will_need_newline) - }; - - // If the last trivia piece was a single-line comment, signal to the items iterator - // it will need to prepend a newline to the leading trivia of the next node, and provide - // it the token that followed this separator in the original source so the newline trivia - // can be cloned from there - let newline_source = - will_need_newline.then(|| sep.as_ref().and_then(|token| token.next_token())); - - needs_newline.set(newline_source); - - token - }); - - let mut new_specifiers = old_specifiers - .clone() - .detach() - .with_specifiers(make::js_named_import_specifier_list(items, separators)); - - // If the separators iterator has a pending newline, prepend it to closing curly token - if let Some(newline_source) = needs_newline.into_inner() { - let new_token = new_specifiers - .r_curly_token() - .ok() - .and_then(|token| prepend_leading_newline(&token, newline_source)); - - if let Some(new_token) = new_token { - new_specifiers = new_specifiers.with_r_curly_token(new_token); - } - } - - import - .replace_node_discard_trivia(old_specifiers, new_specifiers) - .expect("old_specifiers should be a child of import") - } -} - -/// Return a clone of `prev_token` with a newline trivia piece prepended to its -/// leading trivia if it didn't have one already. This function will try to copy -/// the newline trivia piece from the leading trivia of `newline_source` if its set -fn prepend_leading_newline( - prev_token: &JsSyntaxToken, - newline_source: Option, -) -> Option { - // Check if this node already starts with a newline, - // if it does we don't need to prepend anything - let leading_trivia = prev_token.leading_trivia(); - let has_leading_newline = leading_trivia - .first() - .map_or(false, |piece| piece.is_newline()); - - if has_leading_newline { - return None; - } - - // Extract the leading newline from the `newline_source` token - let leading_newline = newline_source.and_then(|newline_source| { - let leading_piece = newline_source.leading_trivia().first()?; - if !leading_piece.is_newline() { - return None; - } - Some(leading_piece) - }); - - // Prepend a newline trivia piece to the node, either by copying the leading newline - // and whitespace from `newline_source`, or falling back to the "\n" character - let leading_newline = if let Some(leading_newline) = &leading_newline { - (leading_newline.kind(), leading_newline.text()) - } else { - (TriviaPieceKind::Newline, "\n") - }; - - let piece_count = 1 + leading_trivia.pieces().len(); - let mut iter = iter::once(leading_newline).chain(leading_trivia_iter(prev_token)); - - Some(prev_token.with_leading_trivia((0..piece_count).map(|_| iter.next().unwrap()))) -} - -/// Builds an iterator over the leading trivia pieces of a token -/// -/// The items of the iterator inherit their lifetime from the token, -/// rather than the trivia pieces themselves -fn leading_trivia_iter( - token: &JsSyntaxToken, -) -> impl ExactSizeIterator { - let token_text = token.text(); - let token_range = token.text_range(); - let trivia = token.leading_trivia(); - trivia.pieces().map(move |piece| { - let piece_range = piece.text_range(); - let range = TextRange::at(piece_range.start() - token_range.start(), piece_range.len()); - - let text = &token_text[range]; - assert_eq!(text, piece.text()); - - (piece.kind(), text) - }) -} - -#[derive(Debug)] -struct ImportKey(TokenText); - -impl Ord for ImportKey { - fn cmp(&self, other: &Self) -> Ordering { - let own_category = ImportCategory::from(self.0.text()); - let other_category = ImportCategory::from(other.0.text()); - if own_category != other_category { - return own_category.cmp(&other_category); - } - - // Sort imports using natural ordering - natord::compare(&self.0, &other.0) - } -} - -impl PartialOrd for ImportKey { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) +pub enum State { + Legacy(legacy::ImportGroups), + Modern, +} + +#[derive(Clone, Debug, Default, serde::Deserialize, Deserializable, serde::Serialize)] +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] +#[serde(rename_all = "camelCase", deny_unknown_fields, default)] +pub struct Options { + legacy: bool, + import_groups: Box<[ImportGroup]>, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] +#[serde(untagged)] +pub enum ImportGroup { + Predefined(PredefinedImportGroup), + Custom(RestrictedGlob), +} +impl Deserializable for ImportGroup { + fn deserialize( + value: &impl biome_deserialize::DeserializableValue, + name: &str, + diagnostics: &mut Vec, + ) -> Option { + Some( + if let Some(predefined) = Deserializable::deserialize(value, name, diagnostics) { + ImportGroup::Predefined(predefined) + } else { + ImportGroup::Custom(Deserializable::deserialize(value, name, diagnostics)?) + }, + ) } } -impl Eq for ImportKey {} - -impl PartialEq for ImportKey { - fn eq(&self, other: &Self) -> bool { - self.0 == other.0 - } -} - -/// Imports get sorted by categories before being sorted on natural order. -/// -/// The rationale for this is that imports "further away" from the source file -/// are listed before imports closer to the source file. -#[derive(Eq, Ord, PartialEq, PartialOrd)] -enum ImportCategory { - /// Anything with an explicit `bun:` prefix. +#[derive(Clone, Debug, serde::Deserialize, Deserializable, Eq, PartialEq, serde::Serialize)] +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] +pub enum PredefinedImportGroup { + #[serde(rename = ":blank-line:")] + BlankLine, + #[serde(rename = ":bun:")] Bun, - /// Anything with an explicit `node:` prefix, or one of the recognized - /// Node built-ins, such `"fs"`, `"child_process"`, etc.. - NodeBuiltin, - /// NPM dependencies with an explicit `npm:` prefix, such as supported by - /// Deno. - Npm, - /// Modules that contains the column `:` are usually considered "virtual modules". E.g. `astro:middleware` - /// - /// This modules are usually injected by the environment of the application, and usually present before any relative module. - VirtualModule, - /// Imports from an absolute URL such as supported by browsers. - Url, - /// Anything without explicit protocol specifier is assumed to be a library - /// import. Because we currently do not have configuration for this, this - /// may (incorrectly) include source imports through custom import mappings - /// as well. - Library, - /// Absolute file imports `/`. - Absolute, - /// Node allows specifying an import map with name prefixed with `#`. - /// See https://nodejs.org/api/packages.html#subpath-imports - SharpImport, - /// Relative file imports `./`. - Relative, -} - -impl From<&str> for ImportCategory { - fn from(value: &str) -> Self { - if value.starts_with('.') { - Self::Relative - } else if let Some((protocol, _)) = value.split_once(':') { - match protocol { - "bun" => Self::Bun, - "http" | "https" => Self::Url, - "node" => Self::NodeBuiltin, - "npm" => Self::Npm, - _ => Self::VirtualModule, - } - } else if value.starts_with('#') { - Self::SharpImport - } else if value.starts_with('/') { - Self::Absolute - } else if NODE_BUILTINS.binary_search(&value).is_ok() { - Self::NodeBuiltin - } else { - Self::Library - } - } -} - -/// Returns true is this trivia piece is "ASCII whitespace" (newline or whitespace) -fn is_ascii_whitespace(piece: &SyntaxTriviaPiece) -> bool { - piece.is_newline() || piece.is_whitespace() -} - -/// Returns true if the provided trivia contains an empty line (two consecutive newline pieces, ignoring whitespace) -fn has_empty_line(trivia: &JsSyntaxTrivia) -> bool { - let mut was_newline = false; - trivia - .pieces() - .filter(|piece| !piece.is_whitespace()) - .any(|piece| { - let prev_newline = was_newline; - was_newline = piece.is_newline(); - prev_newline && was_newline - }) -} - -/// Sorted array of Node builtin -const NODE_BUILTINS: &[&str] = &[ - "assert", - "buffer", - "child_process", - "cluster", - "console", - "constants", - "crypto", - "dgram", - "dns", - "domain", - "events", - "fs", - "http", - "https", - "module", - "net", - "os", - "path", - "punycode", - "querystring", - "readline", - "repl", - "stream", - "string_decoder", - "sys", - "timers", - "tls", - "tty", - "url", - "util", - "vm", - "zlib", -]; - -#[test] -fn test_order() { - for items in NODE_BUILTINS.windows(2) { - assert!(items[0] < items[1], "{} < {}", items[0], items[1]); - } + #[serde(rename = ":node:")] + Node, + #[serde(rename = ":types:")] + Types, } diff --git a/crates/biome_js_analyze/src/assists/source/organize_imports/legacy.rs b/crates/biome_js_analyze/src/assists/source/organize_imports/legacy.rs new file mode 100644 index 000000000000..3f5fd7df92c3 --- /dev/null +++ b/crates/biome_js_analyze/src/assists/source/organize_imports/legacy.rs @@ -0,0 +1,674 @@ +use std::{cell::Cell, cmp::Ordering, collections::BTreeMap, iter}; + +use biome_js_factory::make; +use biome_js_syntax::{ + AnyJsImportClause, AnyJsModuleItem, AnyJsNamedImportSpecifier, JsImport, JsLanguage, JsModule, + JsSyntaxToken, JsSyntaxTrivia, T, +}; +use biome_rowan::{ + chain_trivia_pieces, AstNode, AstNodeExt, AstNodeList, AstSeparatedList, BatchMutation, + SyntaxTriviaPiece, TextRange, TokenText, TriviaPiece, TriviaPieceKind, +}; + +pub(crate) fn run(root: &JsModule) -> Option { + let mut groups = Vec::new(); + let mut first_node = None; + let mut nodes = BTreeMap::new(); + + for item in root.items() { + let AnyJsModuleItem::JsImport(import) = item else { + // If we have pending nodes and encounter a non-import node, append the nodes to a new group + if let Some(first_node) = first_node.take() { + groups.push(ImportGroup { + first_node, + nodes: std::mem::take(&mut nodes), + }); + } + continue; + }; + + let is_side_effect_import = matches!( + import.import_clause(), + Ok(AnyJsImportClause::JsImportBareClause(_)) + ); + if is_side_effect_import { + if let Some(first_node) = first_node.take() { + groups.push(ImportGroup { + first_node, + nodes: std::mem::take(&mut nodes), + }); + } + // A side effect import creates its own import group + let mut nodes = BTreeMap::new(); + nodes.insert( + ImportKey(import.source_text().ok()?), + vec![ImportNode::from(import.clone())], + ); + groups.push(ImportGroup { + first_node: import.clone(), + nodes, + }); + continue; + } + + // If this is not the first import in the group, check for a group break + if has_empty_line(&import.import_token().ok()?.leading_trivia()) { + if let Some(first_node) = first_node.take() { + groups.push(ImportGroup { + first_node, + nodes: std::mem::take(&mut nodes), + }); + } + } + + // If this is the first import in the group save the leading trivia + // and slot index + if first_node.is_none() { + first_node = Some(import.clone()); + } + + nodes + .entry(ImportKey(import.source_text().ok()?)) + .or_default() + .push(ImportNode::from(import)); + } + + // Flush the remaining nodes + if let Some(first_node) = first_node.take() { + groups.push(ImportGroup { first_node, nodes }); + } + + groups + .iter() + .any(|group| !group.is_sorted()) + .then_some(ImportGroups { groups }) +} + +pub(crate) fn action( + root: &JsModule, + groups: &ImportGroups, + mutation: &mut BatchMutation, +) -> Option<()> { + let mut groups_iter = groups.groups.iter(); + let mut next_group = groups_iter.next().expect("state is empty"); + + let old_list = root.items(); + let mut new_list = Vec::new(); + + let mut items_iter = old_list.iter(); + let mut iter = (&mut items_iter).enumerate(); + + // Iterate other the nodes of the old list + while let Some((item_slot, item)) = iter.next() { + // If the current position in the old list is lower than the start + // of the new group, append the old node to the new list + if item_slot < next_group.first_node.syntax().index() { + new_list.push(item); + continue; + } + + // Extract the leading trivia for the whole group from the leading + // trivia for the import token of the first node in the group. If + // the trivia contains empty lines the leading trivia for the group + // comprise all trivia pieces coming before the empty line that's + // closest to the token. Otherwise the group leading trivia is + // created from all the newline and whitespace pieces on the first + // token before the first comment or skipped piece. + let group_first_token = next_group.first_node.import_token().ok()?; + let group_leading_trivia = group_first_token.leading_trivia(); + + let mut prev_newline = None; + let mut group_leading_trivia: Vec<_> = group_leading_trivia + .pieces() + .enumerate() + .rev() + .find_map(|(index, piece)| { + if piece.is_whitespace() { + return None; + } + + let is_newline = piece.is_newline(); + if let Some(first_newline) = prev_newline.filter(|_| is_newline) { + return Some(first_newline + 1); + } + + prev_newline = is_newline.then_some(index); + None + }) + .map_or_else( + || { + group_leading_trivia + .pieces() + .take_while(is_ascii_whitespace) + .collect() + }, + |length| group_leading_trivia.pieces().take(length).collect(), + ); + + let mut saved_leading_trivia = Vec::new(); + let group_leading_pieces = group_leading_trivia.len(); + + let nodes_iter = next_group + .nodes + .values() + // TODO: Try to merge nodes from the same source + .flat_map(|nodes| nodes.iter()) + .enumerate(); + + for (node_index, import_node) in nodes_iter { + // For each node in the group, pop an item from the old list + // iterator (ignoring `item` itself) and discard it + if node_index > 0 { + iter.next() + .unwrap_or_else(|| panic!("missing node {item_slot} {node_index}")); + } + + let first_token = import_node.node.import_token().ok()?; + let mut node = import_node.build_sorted_node(); + + if node_index == 0 && group_first_token != first_token { + // If this node was not previously in the leading position + // but is being moved there, replace its leading whitespace + // with the group's leading trivia + let group_leading_trivia = group_leading_trivia.drain(..); + let mut token_leading_trivia = first_token.leading_trivia().pieces().peekable(); + + // Save off the leading whitespace of the token to be + // reused by the import take the place of this node in the list + while let Some(piece) = token_leading_trivia.next_if(is_ascii_whitespace) { + saved_leading_trivia.push(piece); + } + + node = node.with_import_token(first_token.with_leading_trivia_pieces( + chain_trivia_pieces(group_leading_trivia, token_leading_trivia), + )); + } else if node_index > 0 && group_first_token == first_token { + // If this node used to be in the leading position but + // got moved, remove the group leading trivia from its + // first token + let saved_leading_trivia = saved_leading_trivia.drain(..); + let token_leading_trivia = first_token + .leading_trivia() + .pieces() + .skip(group_leading_pieces); + + node = node.with_import_token(first_token.with_leading_trivia_pieces( + chain_trivia_pieces(saved_leading_trivia, token_leading_trivia), + )); + } + + new_list.push(AnyJsModuleItem::JsImport(node)); + } + + // Load the next group before moving on to the next item in the old + // list, breaking the loop if there a no remaining groups to insert + next_group = match groups_iter.next() { + Some(entry) => entry, + None => break, + }; + } + + // Append all remaining nodes to the new list if the loop performed an + // early exit after reaching the last group + new_list.extend(items_iter); + + let new_list = make::js_module_item_list(new_list); + + mutation.replace_node_discard_trivia(old_list, new_list); + + Some(()) +} + +#[derive(Debug)] +pub struct ImportGroups { + /// The list of all the import groups in the file + groups: Vec, +} + +#[derive(Debug)] +struct ImportGroup { + /// The import that was at the start of the group before sorting + first_node: JsImport, + /// Multimap storing all the imports for each import source in the group, + /// sorted in natural order + nodes: BTreeMap>, +} + +impl ImportGroup { + /// Returns true if the nodes in the group are already sorted in the file + fn is_sorted(&self) -> bool { + // The imports are sorted if the text position of each node in the `BTreeMap` + // (sorted in natural order) is higher than the previous item in + // the sequence + let mut iter = self.nodes.values().flat_map(|nodes| nodes.iter()); + let Some(import_node) = iter.next() else { + return true; + }; + let mut previous_start = import_node.node.syntax().text_range().end(); + import_node.is_sorted() + && iter.all(|import_node| { + let start = import_node.node.syntax().text_range().end(); + let is_sorted = previous_start < start && import_node.is_sorted(); + previous_start = start; + is_sorted + }) + } +} + +#[derive(Debug)] +struct ImportNode { + /// The original `JsImport` node this import node was created from + node: JsImport, + /// The number of separators present in the named specifiers list of this node if it has one + separator_count: usize, + /// Map storing all the named import specifiers and their associated trailing separator, + /// sorted in natural order + specifiers: BTreeMap)>, +} + +impl From for ImportNode { + fn from(node: JsImport) -> Self { + let import_clause = node.import_clause().ok(); + + let mut separator_count = 0; + let specifiers = import_clause.and_then(|import_clause| { + let AnyJsImportClause::JsImportNamedClause(import_named_clause) = import_clause else { + return None; + }; + let named_import_specifiers = import_named_clause.named_specifiers().ok()?; + let mut result = BTreeMap::new(); + + for element in named_import_specifiers.specifiers().elements() { + let node = element.node.ok()?; + let key = node.imported_name()?.token_text_trimmed(); + + let trailing_separator = element.trailing_separator.ok()?; + separator_count += usize::from(trailing_separator.is_some()); + + result.insert(ImportKey(key), (node, trailing_separator)); + } + + Some(result) + }); + + Self { + node, + separator_count, + specifiers: specifiers.unwrap_or_default(), + } + } +} + +impl ImportNode { + /// Returns `true` if the named import specifiers of this import node are sorted + fn is_sorted(&self) -> bool { + let mut iter = self + .specifiers + .values() + .map(|(node, _)| node.syntax().text_range().start()); + let mut previous_start = iter.next().unwrap_or_default(); + iter.all(|start| { + let is_sorted = previous_start < start; + previous_start = start; + is_sorted + }) + } + + /// Build a clone of the original node this import node was created from with its import specifiers sorted + fn build_sorted_node(&self) -> JsImport { + let import = self.node.clone().detach(); + + let import_clause = import.import_clause(); + let Ok(AnyJsImportClause::JsImportNamedClause(import_named_clause)) = import_clause else { + return import; + }; + let Ok(old_specifiers) = import_named_clause.named_specifiers() else { + return import; + }; + + let element_count = self.specifiers.len(); + let last_element = element_count.saturating_sub(1); + let separator_count = self.separator_count.max(last_element); + let needs_newline: Cell>> = Cell::new(None); + + let items = self + .specifiers + .values() + .enumerate() + .map(|(index, (node, sep))| { + let is_last = index == last_element; + + let mut node = node.clone().detach(); + let Some(prev_token) = node.syntax().last_token() else { + return node; + }; + + if let Some(sep) = sep { + if is_last && separator_count == last_element { + // If this is the last item and we are removing its trailing separator, + // move the trailing trivia from the separator to the node + let next_token = + prev_token.append_trivia_pieces(sep.trailing_trivia().pieces()); + + node = node + .replace_token_discard_trivia(prev_token, next_token) + .expect("prev_token should be a child of node"); + } + } else if !is_last { + // If the node has no separator and this is not the last item, + // remove the trailing trivia since it will get cloned on the inserted separator + let next_token = prev_token.with_trailing_trivia([]); + node = node + .replace_token_discard_trivia(prev_token, next_token) + .expect("prev_token should be a child of node"); + } + + // Check if the last separator we emitted ended with a single-line comment + if let Some(newline_source) = needs_newline.take() { + if let Some(first_token) = node.syntax().first_token() { + if let Some(new_token) = + prepend_leading_newline(&first_token, newline_source) + { + node = node + .replace_token_discard_trivia(first_token, new_token) + .expect("first_token should be a child of node"); + } + } + } + + node + }); + + let separators = self + .specifiers + .values() + .take(separator_count) + .map(|(node, sep)| { + // If this entry has an associated separator, reuse it + let (token, will_need_newline) = if let Some(sep) = sep { + // If the last trivia piece for the separator token is a single-line comment, + // signal to the items iterator it will need to prepend a newline to the leading + // trivia of the next node + let will_need_newline = sep + .trailing_trivia() + .last() + .map_or(false, |piece| piece.kind().is_single_line_comment()); + + (sep.clone(), will_need_newline) + } else { + // If the node we're attaching this separator to has no trailing trivia, just create a simple comma token + let last_trailing_trivia = match node.syntax().last_trailing_trivia() { + Some(trivia) if !trivia.is_empty() => trivia, + _ => { + let sep = make::token(T![,]); + return if node.syntax().has_leading_newline() { + sep + } else { + sep.with_trailing_trivia([(TriviaPieceKind::Whitespace, " ")]) + }; + } + }; + + // Otherwise we need to clone the trailing trivia from the node to the separator + // (the items iterator should have already filtered this trivia when it previously + // emitted the node) + let mut text = String::from(","); + let mut trailing = Vec::with_capacity(last_trailing_trivia.pieces().len()); + + let mut will_need_newline = false; + for piece in last_trailing_trivia.pieces() { + text.push_str(piece.text()); + trailing.push(TriviaPiece::new(piece.kind(), piece.text_len())); + will_need_newline = + matches!(piece.kind(), TriviaPieceKind::SingleLineComment); + } + + let token = JsSyntaxToken::new_detached(T![,], &text, [], trailing); + (token, will_need_newline) + }; + + // If the last trivia piece was a single-line comment, signal to the items iterator + // it will need to prepend a newline to the leading trivia of the next node, and provide + // it the token that followed this separator in the original source so the newline trivia + // can be cloned from there + let newline_source = + will_need_newline.then(|| sep.as_ref().and_then(|token| token.next_token())); + + needs_newline.set(newline_source); + + token + }); + + let mut new_specifiers = old_specifiers + .clone() + .detach() + .with_specifiers(make::js_named_import_specifier_list(items, separators)); + + // If the separators iterator has a pending newline, prepend it to closing curly token + if let Some(newline_source) = needs_newline.into_inner() { + let new_token = new_specifiers + .r_curly_token() + .ok() + .and_then(|token| prepend_leading_newline(&token, newline_source)); + + if let Some(new_token) = new_token { + new_specifiers = new_specifiers.with_r_curly_token(new_token); + } + } + + import + .replace_node_discard_trivia(old_specifiers, new_specifiers) + .expect("old_specifiers should be a child of import") + } +} + +/// Return a clone of `prev_token` with a newline trivia piece prepended to its +/// leading trivia if it didn't have one already. This function will try to copy +/// the newline trivia piece from the leading trivia of `newline_source` if its set +fn prepend_leading_newline( + prev_token: &JsSyntaxToken, + newline_source: Option, +) -> Option { + // Check if this node already starts with a newline, + // if it does we don't need to prepend anything + let leading_trivia = prev_token.leading_trivia(); + let has_leading_newline = leading_trivia + .first() + .map_or(false, |piece| piece.is_newline()); + + if has_leading_newline { + return None; + } + + // Extract the leading newline from the `newline_source` token + let leading_newline = newline_source.and_then(|newline_source| { + let leading_piece = newline_source.leading_trivia().first()?; + if !leading_piece.is_newline() { + return None; + } + Some(leading_piece) + }); + + // Prepend a newline trivia piece to the node, either by copying the leading newline + // and whitespace from `newline_source`, or falling back to the "\n" character + let leading_newline = if let Some(leading_newline) = &leading_newline { + (leading_newline.kind(), leading_newline.text()) + } else { + (TriviaPieceKind::Newline, "\n") + }; + + let piece_count = 1 + leading_trivia.pieces().len(); + let mut iter = iter::once(leading_newline).chain(leading_trivia_iter(prev_token)); + + Some(prev_token.with_leading_trivia((0..piece_count).map(|_| iter.next().unwrap()))) +} + +/// Builds an iterator over the leading trivia pieces of a token +/// +/// The items of the iterator inherit their lifetime from the token, +/// rather than the trivia pieces themselves +fn leading_trivia_iter( + token: &JsSyntaxToken, +) -> impl ExactSizeIterator { + let token_text = token.text(); + let token_range = token.text_range(); + let trivia = token.leading_trivia(); + trivia.pieces().map(move |piece| { + let piece_range = piece.text_range(); + let range = TextRange::at(piece_range.start() - token_range.start(), piece_range.len()); + + let text = &token_text[range]; + assert_eq!(text, piece.text()); + + (piece.kind(), text) + }) +} + +#[derive(Debug)] +struct ImportKey(TokenText); + +impl Ord for ImportKey { + fn cmp(&self, other: &Self) -> Ordering { + let own_category = ImportCategory::from(self.0.text()); + let other_category = ImportCategory::from(other.0.text()); + if own_category != other_category { + return own_category.cmp(&other_category); + } + + // Sort imports using natural ordering + natord::compare(&self.0, &other.0) + } +} + +impl PartialOrd for ImportKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Eq for ImportKey {} + +impl PartialEq for ImportKey { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +/// Imports get sorted by categories before being sorted on natural order. +/// +/// The rationale for this is that imports "further away" from the source file +/// are listed before imports closer to the source file. +#[derive(Eq, Ord, PartialEq, PartialOrd)] +enum ImportCategory { + /// Anything with an explicit `bun:` prefix. + Bun, + /// Anything with an explicit `node:` prefix, or one of the recognized + /// Node built-ins, such `"fs"`, `"child_process"`, etc.. + NodeBuiltin, + /// NPM dependencies with an explicit `npm:` prefix, such as supported by + /// Deno. + Npm, + /// Modules that contains the column `:` are usually considered "virtual modules". E.g. `astro:middleware` + /// + /// This modules are usually injected by the environment of the application, and usually present before any relative module. + VirtualModule, + /// Imports from an absolute URL such as supported by browsers. + Url, + /// Anything without explicit protocol specifier is assumed to be a library + /// import. Because we currently do not have configuration for this, this + /// may (incorrectly) include source imports through custom import mappings + /// as well. + Library, + /// Absolute file imports `/`. + Absolute, + /// Node allows specifying an import map with name prefixed with `#`. + /// See https://nodejs.org/api/packages.html#subpath-imports + SharpImport, + /// Relative file imports `./`. + Relative, +} + +impl From<&str> for ImportCategory { + fn from(value: &str) -> Self { + if value.starts_with('.') { + Self::Relative + } else if let Some((protocol, _)) = value.split_once(':') { + match protocol { + "bun" => Self::Bun, + "http" | "https" => Self::Url, + "node" => Self::NodeBuiltin, + "npm" => Self::Npm, + _ => Self::VirtualModule, + } + } else if value.starts_with('#') { + Self::SharpImport + } else if value.starts_with('/') { + Self::Absolute + } else if NODE_BUILTINS.binary_search(&value).is_ok() { + Self::NodeBuiltin + } else { + Self::Library + } + } +} + +/// Returns true is this trivia piece is "ASCII whitespace" (newline or whitespace) +fn is_ascii_whitespace(piece: &SyntaxTriviaPiece) -> bool { + piece.is_newline() || piece.is_whitespace() +} + +/// Returns true if the provided trivia contains an empty line (two consecutive newline pieces, ignoring whitespace) +fn has_empty_line(trivia: &JsSyntaxTrivia) -> bool { + let mut was_newline = false; + trivia + .pieces() + .filter(|piece| !piece.is_whitespace()) + .any(|piece| { + let prev_newline = was_newline; + was_newline = piece.is_newline(); + prev_newline && was_newline + }) +} + +/// Sorted array of Node builtin +const NODE_BUILTINS: &[&str] = &[ + "assert", + "buffer", + "child_process", + "cluster", + "console", + "constants", + "crypto", + "dgram", + "dns", + "domain", + "events", + "fs", + "http", + "https", + "module", + "net", + "os", + "path", + "punycode", + "querystring", + "readline", + "repl", + "stream", + "string_decoder", + "sys", + "timers", + "tls", + "tty", + "url", + "util", + "vm", + "zlib", +]; + +#[test] +fn test_order() { + for items in NODE_BUILTINS.windows(2) { + assert!(items[0] < items[1], "{} < {}", items[0], items[1]); + } +} diff --git a/crates/biome_js_analyze/src/utils.rs b/crates/biome_js_analyze/src/utils.rs index 1bd8375a4df1..7c1a4693212a 100644 --- a/crates/biome_js_analyze/src/utils.rs +++ b/crates/biome_js_analyze/src/utils.rs @@ -4,6 +4,7 @@ use std::iter; pub mod batch; pub mod rename; +pub mod restricted_glob; pub mod restricted_regex; #[cfg(test)] pub mod tests; diff --git a/crates/biome_js_analyze/src/utils/restricted_glob.rs b/crates/biome_js_analyze/src/utils/restricted_glob.rs new file mode 100644 index 000000000000..677aea541483 --- /dev/null +++ b/crates/biome_js_analyze/src/utils/restricted_glob.rs @@ -0,0 +1,261 @@ +use biome_rowan::{TextRange, TextSize}; + +/// A restricted glob pattern only supports the following syntaxes: +/// +/// - star `*` that matches zero or more characters inside a path segment +/// - globstar `**` that matches zero or more path segments +/// - Use `\*` to escape `*` +/// - `?`, `[`, `]`, `{`, and `}` must be escaped using `\`. +/// These characters are reserved for future use. +/// - `!` must be escaped if it is the first character of the pattern +/// +/// A path segment is delimited by path separator `/` or the start/end of the path. +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(try_from = "String", into = "String")] +pub struct RestrictedGlob(globset::GlobMatcher); +impl RestrictedGlob { + /// Tests whether the given path matches this pattern or not. + pub fn is_match(&self, path: impl AsRef) -> bool { + self.0.is_match(path) + } + + /// Tests whether the given path matches this pattern or not. + pub fn is_match_candidate(&self, path: &CandidatePath<'_>) -> bool { + self.0.is_match_candidate(&path.0) + } +} +impl std::fmt::Display for RestrictedGlob { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let repr = self.0.glob().to_string(); + f.write_str(&repr) + } +} +impl From for String { + fn from(value: RestrictedGlob) -> Self { + value.to_string() + } +} +impl std::str::FromStr for RestrictedGlob { + type Err = RestrictedGlobError; + fn from_str(value: &str) -> Result { + validate_restricted_glob(value)?; + let mut glob_builder = globset::GlobBuilder::new(value); + // Allow escaping with `\` on all platforms. + glob_builder.backslash_escape(true); + // Only `**` can match `/` + glob_builder.literal_separator(true); + match glob_builder.build() { + Ok(glob) => Ok(RestrictedGlob(glob.compile_matcher())), + Err(error) => Err(RestrictedGlobError::Generic( + error.kind().to_string().into_boxed_str(), + )), + } + } +} +impl TryFrom for RestrictedGlob { + type Error = RestrictedGlobError; + fn try_from(value: String) -> Result { + value.parse() + } +} +// We use a custom impl to precisely report the location of the error. +impl biome_deserialize::Deserializable for RestrictedGlob { + fn deserialize( + value: &impl biome_deserialize::DeserializableValue, + name: &str, + diagnostics: &mut Vec, + ) -> Option { + let glob = String::deserialize(value, name, diagnostics)?; + match glob.parse() { + Ok(glob) => Some(glob), + Err(error) => { + let range = value.range(); + let range = error.index().map_or(range, |index| { + TextRange::at(range.start() + TextSize::from(1 + index), 1u32.into()) + }); + diagnostics.push( + biome_deserialize::DeserializationDiagnostic::new(format_args!("{error}")) + .with_range(range), + ); + None + } + } + } +} +#[cfg(feature = "schemars")] +impl schemars::JsonSchema for RestrictedGlob { + fn schema_name() -> String { + "Regex".to_string() + } + + fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { + String::json_schema(gen) + } +} + +/// A candidate path for matching. +/// +/// Constructing candidates has a very small cost associated with it, so +/// callers may find it beneficial to amortize that cost when matching a single +/// path against multiple globs or sets of globs. +pub struct CandidatePath<'a>(globset::Candidate<'a>); +impl<'a> CandidatePath<'a> { + /// Create a new candidate for matching from the given path. + pub fn new(path: &'a impl AsRef) -> Self { + Self(globset::Candidate::new(path)) + } +} + +#[derive(Debug)] +pub enum RestrictedGlobError { + Regular { + kind: RestrictedGlobErrorKind, + index: u32, + }, + /// Error caused by a a third-party module. + Generic(Box), +} +impl RestrictedGlobError { + /// Returns the index in the glob where the error is located. + pub fn index(&self) -> Option { + match self { + Self::Regular { index, .. } => Some(*index), + Self::Generic(_) => None, + } + } +} +impl std::error::Error for RestrictedGlobError {} +impl std::fmt::Display for RestrictedGlobError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Regular { kind, .. } => write!(f, "{kind}"), + Self::Generic(desc) => write!(f, "{desc}"), + } + } +} + +#[derive(Debug, Eq, PartialEq)] +pub enum RestrictedGlobErrorKind { + /// Occurs when an unescaped '\' is found at the end of a glob. + DanglingEscape, + /// Occurs when an invalid escape is found. + /// If the character is not set, then it is an invalid UTF-8 character. + InvalidEscape(char), + UnsupportedAlternates, + UnsupportedCharacterClass, + UnsupportedAnyCharacter, + UnsupportedNegation, +} +impl std::fmt::Display for RestrictedGlobErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let desc = match self { + Self::DanglingEscape => "Unterminated escape sequence.", + Self::InvalidEscape(c) => { + return write!(f, "The escape sequence `\\{c}` is not supported."); + } + Self::UnsupportedAlternates => { + r"Alternates `{}` are not supported. Use `\{` and `\}` to escape the characters." + } + Self::UnsupportedCharacterClass => { + r"Character class `[]` are not supported. Use `\[` and `\]` to escape the characters." + } + Self::UnsupportedAnyCharacter => { + r"`?` matcher is not supported. Use `\?` to escape the character." + } + Self::UnsupportedNegation => { + r"Negated globs `!` are not supported. Use `\!` to escape the character." + } + }; + write!(f, "{desc}") + } +} + +/// Returns an error if `pattern` doesn't follow the restricted glob syntax. +fn validate_restricted_glob(pattern: &str) -> Result<(), RestrictedGlobError> { + let mut it = pattern.bytes().enumerate(); + while let Some((i, c)) = it.next() { + match c { + b'!' if i == 0 => { + return Err(RestrictedGlobError::Regular { + kind: RestrictedGlobErrorKind::UnsupportedNegation, + index: i as u32, + }); + } + b'\\' => { + // Accept a restrictive set of escape sequence + if let Some((j, c)) = it.next() { + if !matches!(c, b'!' | b'*' | b'?' | b'{' | b'}' | b'[' | b']' | b'\\') { + return Err(RestrictedGlobError::Regular { + kind: RestrictedGlobErrorKind::InvalidEscape( + // SAFETY: the index `j` starts a new character + // because it is preceded by the character `\\`. + pattern[j..].chars().next().expect("valid character"), + ), + index: i as u32, + }); + } + } else { + return Err(RestrictedGlobError::Regular { + kind: RestrictedGlobErrorKind::DanglingEscape, + index: i as u32, + }); + } + } + b'?' => { + return Err(RestrictedGlobError::Regular { + kind: RestrictedGlobErrorKind::UnsupportedAnyCharacter, + index: i as u32, + }); + } + b'[' | b']' => { + return Err(RestrictedGlobError::Regular { + kind: RestrictedGlobErrorKind::UnsupportedCharacterClass, + index: i as u32, + }); + } + b'{' | b'}' => { + return Err(RestrictedGlobError::Regular { + kind: RestrictedGlobErrorKind::UnsupportedAlternates, + index: i as u32, + }); + } + _ => {} + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validate_restricted_glob() { + assert!(validate_restricted_glob("!*.js").is_err()); + assert!(validate_restricted_glob("*.[jt]s").is_err()); + assert!(validate_restricted_glob("*.{js,ts}").is_err()); + assert!(validate_restricted_glob("?*.js").is_err()); + assert!(validate_restricted_glob(r"\").is_err()); + assert!(validate_restricted_glob(r"\n").is_err()); + assert!(validate_restricted_glob(r"\😀").is_err()); + assert!(validate_restricted_glob("!").is_err()); + + assert!(validate_restricted_glob("*.js").is_ok()); + assert!(validate_restricted_glob("**/*.js").is_ok()); + assert!(validate_restricted_glob(r"\*").is_ok()); + assert!(validate_restricted_glob(r"\!").is_ok()); + } + + #[test] + fn test_restricted_regex() { + assert!(!"*.js" + .parse::() + .unwrap() + .is_match("file/path.js")); + + assert!("**/*.js" + .parse::() + .unwrap() + .is_match("file/path.js")); + } +} diff --git a/crates/biome_js_analyze/src/utils/restricted_regex.rs b/crates/biome_js_analyze/src/utils/restricted_regex.rs index 775c3805cc75..7241fd3e3a71 100644 --- a/crates/biome_js_analyze/src/utils/restricted_regex.rs +++ b/crates/biome_js_analyze/src/utils/restricted_regex.rs @@ -1,6 +1,7 @@ use std::{ops::Deref, str::FromStr}; -use biome_deserialize_macros::Deserializable; +use biome_deserialize::DeserializationDiagnostic; +use biome_rowan::{TextRange, TextSize}; /// A restricted regular expression only supports widespread syntaxes: /// @@ -20,18 +21,10 @@ use biome_deserialize_macros::Deserializable; /// and regular string escape characters `\f`, `\n`, `\r`, `\t`, `\v` /// /// A restricted regular expression is implicitly delimited by the anchors `^` and `$`. -#[derive(Clone, Debug, Deserializable, serde::Deserialize, serde::Serialize)] +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] #[serde(try_from = "String", into = "String")] pub struct RestrictedRegex(regex::Regex); -impl Deref for RestrictedRegex { - type Target = regex::Regex; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - impl RestrictedRegex { /// Returns the original string of this regex. pub fn as_str(&self) -> &str { @@ -42,6 +35,14 @@ impl RestrictedRegex { } } +impl Deref for RestrictedRegex { + type Target = regex::Regex; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + impl std::fmt::Display for RestrictedRegex { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(self.as_str()) @@ -55,22 +56,48 @@ impl From for String { } impl FromStr for RestrictedRegex { - type Err = regex::Error; + type Err = RestrictedRegexError; fn from_str(value: &str) -> Result { - is_restricted_regex(value)?; - regex::Regex::new(&format!("^(?:{value})$")).map(RestrictedRegex) + validate_restricted_regex(value)?; + regex::Regex::new(&format!("^(?:{value})$")) + .map(RestrictedRegex) + .map_err(|error| RestrictedRegexError { error, index: None }) } } impl TryFrom for RestrictedRegex { - type Error = regex::Error; + type Error = RestrictedRegexError; fn try_from(value: String) -> Result { value.parse() } } +// We use a custom impl to precisely report the location of the error. +impl biome_deserialize::Deserializable for RestrictedRegex { + fn deserialize( + value: &impl biome_deserialize::DeserializableValue, + name: &str, + diagnostics: &mut Vec, + ) -> Option { + let regex = String::deserialize(value, name, diagnostics)?; + match regex.parse() { + Ok(regex) => Some(regex), + Err(error) => { + let range = value.range(); + let range = error.index().map_or(range, |index| { + TextRange::at(range.start() + TextSize::from(1 + index), 1u32.into()) + }); + diagnostics.push( + DeserializationDiagnostic::new(format_args!("{error}")).with_range(range), + ); + None + } + } + } +} + #[cfg(feature = "schemars")] impl schemars::JsonSchema for RestrictedRegex { fn schema_name() -> String { @@ -90,17 +117,42 @@ impl PartialEq for RestrictedRegex { } } -/// Rteurns an error if `pattern` doesn't follow the restricted regular expression syntax. -fn is_restricted_regex(pattern: &str) -> Result<(), regex::Error> { - let mut it = pattern.bytes(); +#[derive(Debug)] +pub struct RestrictedRegexError { + error: regex::Error, + index: Option, +} +impl RestrictedRegexError { + fn new(error: regex::Error, index: usize) -> Self { + Self { + error, + index: Some(index as u32), + } + } + + /// Returns the index in the glob where the error is located. + pub fn index(&self) -> Option { + self.index + } +} +impl std::error::Error for RestrictedRegexError {} +impl std::fmt::Display for RestrictedRegexError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.error.fmt(f) + } +} + +/// Returns an error if `pattern` doesn't follow the restricted regular expression syntax. +fn validate_restricted_regex(pattern: &str) -> Result<(), RestrictedRegexError> { + let mut it = pattern.bytes().enumerate(); let mut is_in_char_class = false; - while let Some(c) = it.next() { + while let Some((i, c)) = it.next() { match c { b'\\' => { // Accept a restrictive set of escape sequence // We keep only escaped chars that behave identically // in unicode-enabled and unicode-disabled RegExes. - if let Some(c) = it.next() { + if let Some((_, c)) = it.next() { if !matches!( c, b'^' | b'|' @@ -119,30 +171,48 @@ fn is_restricted_regex(pattern: &str) -> Result<(), regex::Error> { | b'v' | b'\\' ) { - // SAFETY: safe because of the match - let c = unsafe { char::from_u32_unchecked(c as u32) }; - // Escape sequences https://docs.rs/regex/latest/regex/#escape-sequences - // and Perl char classes https://docs.rs/regex/latest/regex/#perl-character-classes-unicode-friendly - return Err(regex::Error::Syntax(format!( - "Escape sequence \\{c} is not supported." - ))); + if c.is_ascii() { + // SAFETY: `c` is ASCIIaccording to the conditional + let c = c as char; + // Escape sequences https://docs.rs/regex/latest/regex/#escape-sequences + // and Perl char classes https://docs.rs/regex/latest/regex/#perl-character-classes-unicode-friendly + return Err(RestrictedRegexError::new( + regex::Error::Syntax(format!( + "Escape sequence \\{c} is not supported." + )), + i, + )); + } else { + return Err(RestrictedRegexError::new( + regex::Error::Syntax( + "Escape sequence cannot contain a multi-byte character." + .to_string(), + ), + i, + )); + } } } else { - return Err(regex::Error::Syntax( - r"`\` should be followed by a character.".to_string(), + return Err(RestrictedRegexError::new( + regex::Error::Syntax(r"`\` should be followed by a character.".to_string()), + i, )); } } b'^' | b'$' if !is_in_char_class => { // Anchors are implicit and always present in a restricted regex - return Err(regex::Error::Syntax( - "Anchors `^` and `$` are not supported. They are implciitly present." - .to_string(), + return Err(RestrictedRegexError::new( + regex::Error::Syntax( + "Anchors `^` and `$` are not supported. They are implciitly present." + .to_string(), + ), + i, )); } b'[' if is_in_char_class => { - return Err(regex::Error::Syntax( - "Nested character class are not supported.".to_string(), + return Err(RestrictedRegexError::new( + regex::Error::Syntax("Nested character class are not supported.".to_string()), + i, )); } b'[' => { @@ -152,61 +222,80 @@ fn is_restricted_regex(pattern: &str) -> Result<(), regex::Error> { is_in_char_class = false; } b'&' | b'~' | b'-' if is_in_char_class => { - if it.next() == Some(c) { - return Err(regex::Error::Syntax( - "Character class operator `&&`, `~~`, `--` are not supported.".to_string(), + if it.next().is_some_and(|(_, x)| x == c) { + return Err(RestrictedRegexError::new( + regex::Error::Syntax( + "Character class operator `&&`, `~~`, `--` are not supported." + .to_string(), + ), + i, )); } } - b'(' if !is_in_char_class => match it.next() { - Some(b'[') => { - is_in_char_class = true; - } - Some(b'?') => match it.next() { - Some(b'P' | b'=' | b'!' | b'<') => { - return if c == b'P' - || (c == b'<' && !matches!(it.next(), Some(b'=' | b'!'))) - { - Err(regex::Error::Syntax( - "Named groups `(?)` are not supported.".to_string(), - )) - } else { - Err(regex::Error::Syntax( + b'(' if !is_in_char_class => { + match it.next() { + Some((_, b'[')) => { + is_in_char_class = true; + } + Some((_, b'?')) => match it.next() { + Some((i, b'P' | b'=' | b'!' | b'<')) => { + return if c == b'P' + || (c == b'<' && !matches!(it.next(), Some((_, b'=' | b'!')))) + { + Err(RestrictedRegexError::new( + regex::Error::Syntax( + "Named groups `(?)` are not supported.".to_string(), + ), + i, + )) + } else { + Err(RestrictedRegexError::new(regex::Error::Syntax( "Assertions `(?P)`, `(?=)`, `(?!)`,`(?<)` are not supported." .to_string(), - )) - }; - } - Some(b':') => {} - c => { - let mut current = c; - while matches!(current, Some(b'i' | b'm' | b's' | b'-')) { - current = it.next() + ), i)) + }; } - match current { - Some(b':') => {} - Some(b')') => { - return Err(regex::Error::Syntax( - "Group modifiers `(?flags)` are not supported.".to_string(), - )); + Some((_, b':')) => {} + c => { + let mut current = c; + while matches!(current, Some((_, b'i' | b'm' | b's' | b'-'))) { + current = it.next() } - Some(c) if c.is_ascii() => { - // SAFETY: `c` is ASCII according to the guard - let c = c as char; - return Err(regex::Error::Syntax(format!( - "Group flags `(?{c}:)` are not supported." - ))); - } - _ => { - return Err(regex::Error::Syntax( - "Unterminated non-capturing group.".to_string(), - )); + match current { + Some((_, b':')) => {} + Some((_, b')')) => { + return Err(RestrictedRegexError::new( + regex::Error::Syntax( + "Group modifiers `(?flags)` are not supported." + .to_string(), + ), + i, + )); + } + Some((i, c)) if c.is_ascii() => { + // SAFETY: `c` is ASCII according to the guard + let c = c as char; + return Err(RestrictedRegexError::new( + regex::Error::Syntax(format!( + "Group flags `(?{c}:)` are not supported." + )), + i, + )); + } + _ => { + return Err(RestrictedRegexError::new( + regex::Error::Syntax( + "Unterminated non-capturing group.".to_string(), + ), + i, + )); + } } } - } - }, - _ => {} - }, + }, + _ => {} + } + } _ => {} } } @@ -218,28 +307,29 @@ mod tests { use super::*; #[test] - fn test() { - assert!(is_restricted_regex("^a").is_err()); - assert!(is_restricted_regex("a$").is_err()); - assert!(is_restricted_regex(r"\").is_err()); - assert!(is_restricted_regex(r"\p{L}").is_err()); - assert!(is_restricted_regex(r"(?=a)").is_err()); - assert!(is_restricted_regex(r"(?!a)").is_err()); - assert!(is_restricted_regex(r"(?:a)").is_err()); - assert!(is_restricted_regex(r"[[:digit:]]").is_err()); - assert!(is_restricted_regex(r"[a[bc]d]").is_err()); - assert!(is_restricted_regex(r"[ab--a]").is_err()); - assert!(is_restricted_regex(r"[ab&&a]").is_err()); - assert!(is_restricted_regex(r"[ab~~a]").is_err()); - - assert!(is_restricted_regex("").is_ok()); - assert!(is_restricted_regex("abc").is_ok()); - assert!(is_restricted_regex("(?:a)(.+)z").is_ok()); - assert!(is_restricted_regex("(?ims:a)(.+)z").is_ok()); - assert!(is_restricted_regex("(?-ims:a)(.+)z").is_ok()); - assert!(is_restricted_regex("(?i-ms:a)(.+)z").is_ok()); - assert!(is_restricted_regex("[A-Z][^a-z]").is_ok()); - assert!(is_restricted_regex(r"\n\t\v\f").is_ok()); - assert!(is_restricted_regex("([^_])").is_ok()); + fn test_validate_restricted_regex() { + assert!(validate_restricted_regex("^a").is_err()); + assert!(validate_restricted_regex("a$").is_err()); + assert!(validate_restricted_regex(r"\").is_err()); + assert!(validate_restricted_regex(r"\p{L}").is_err()); + assert!(validate_restricted_regex(r"\😀").is_err()); + assert!(validate_restricted_regex(r"(?=a)").is_err()); + assert!(validate_restricted_regex(r"(?!a)").is_err()); + assert!(validate_restricted_regex(r"(?:a)").is_err()); + assert!(validate_restricted_regex(r"[[:digit:]]").is_err()); + assert!(validate_restricted_regex(r"[a[bc]d]").is_err()); + assert!(validate_restricted_regex(r"[ab--a]").is_err()); + assert!(validate_restricted_regex(r"[ab&&a]").is_err()); + assert!(validate_restricted_regex(r"[ab~~a]").is_err()); + + assert!(validate_restricted_regex("").is_ok()); + assert!(validate_restricted_regex("abc").is_ok()); + assert!(validate_restricted_regex("(?:a)(.+)z").is_ok()); + assert!(validate_restricted_regex("(?ims:a)(.+)z").is_ok()); + assert!(validate_restricted_regex("(?-ims:a)(.+)z").is_ok()); + assert!(validate_restricted_regex("(?i-ms:a)(.+)z").is_ok()); + assert!(validate_restricted_regex("[A-Z][^a-z]").is_ok()); + assert!(validate_restricted_regex(r"\n\t\v\f").is_ok()); + assert!(validate_restricted_regex("([^_])").is_ok()); } } diff --git a/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js.snap b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js.snap index a445fdcd1ad1..1fd2190ecf83 100644 --- a/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js.snap +++ b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js.snap @@ -32,14 +32,14 @@ expression: invalidCustomRegexAnchor.js # Diagnostics ``` -invalidCustomRegexAnchor.options:14:18 deserialize ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +invalidCustomRegexAnchor.options:14:24 deserialize ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ × Anchors `^` and `$` are not supported. They are implciitly present. 12 │ "kind": "const" 13 │ }, > 14 │ "match": "(.*?)$", - │ ^^^^^^^^ + │ ^ 15 │ "formats": ["camelCase"] 16 │ }