From c9283d9259919260e4015eef9dfeab567f9a2a72 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Sat, 16 Apr 2022 12:26:35 +0200 Subject: [PATCH 1/3] perf(rome_js_formatter): Reduce the `String` allocations for Tokens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR reduces the amount of `String` allocation necessary for `FormatElement::Token`s by making use of the observation that most tokens match the text of a `SyntaxToken`. For example, identifiers, or punctuation tokens are kept by the formatter as is. This is even true for string literal tokens if they already use the right quotes. The way this is implemented is by introducing a new `SyntaxTokenText` that is `Send + Sync` and allows referencing a slice in a `SyntaxToken` without worrying about the `&str`'s lifetime. The PR further extends `FormatElement::Token` to make use of this new introduced `SyntaxTokenText`. This change reduces overall memory consumption and improves performance: ``` group format-element token ----- -------------- ----- formatter/checker.ts 1.04 250.2±5.03ms 10.4 MB/sec 1.00 239.5±1.76ms 10.9 MB/sec formatter/compiler.js 1.07 145.6±1.30ms 7.2 MB/sec 1.00 136.5±1.43ms 7.7 MB/sec formatter/d3.min.js 1.07 117.4±3.70ms 2.2 MB/sec 1.00 109.6±1.24ms 2.4 MB/sec formatter/dojo.js 1.03 7.4±0.15ms 9.2 MB/sec 1.00 7.2±0.03ms 9.5 MB/sec formatter/ios.d.ts 1.05 181.2±1.95ms 10.3 MB/sec 1.00 172.8±2.23ms 10.8 MB/sec formatter/jquery.min.js 1.02 29.1±0.55ms 2.8 MB/sec 1.00 28.5±0.07ms 2.9 MB/sec formatter/math.js 1.05 233.1±4.69ms 2.8 MB/sec 1.00 222.8±1.79ms 2.9 MB/sec formatter/parser.ts 1.03 5.3±0.15ms 9.2 MB/sec 1.00 5.1±0.01ms 9.5 MB/sec formatter/pixi.min.js 1.10 131.0±7.11ms 3.3 MB/sec 1.00 119.3±2.12ms 3.7 MB/sec formatter/react-dom.production.min.js 1.07 37.0±0.82ms 3.1 MB/sec 1.00 34.5±0.21ms 3.3 MB/sec formatter/react.production.min.js 1.08 1825.1±57.85µs 3.4 MB/sec 1.00 1683.8±30.49µs 3.7 MB/sec formatter/router.ts 1.02 3.7±0.09ms 16.2 MB/sec 1.00 3.6±0.01ms 16.6 MB/sec formatter/tex-chtml-full.js 1.05 288.3±5.19ms 3.2 MB/sec 1.00 273.4±1.29ms 3.3 MB/sec formatter/three.min.js 1.11 155.7±3.79ms 3.8 MB/sec 1.00 139.7±1.76ms 4.2 MB/sec formatter/typescript.js 1.04 945.2±6.64ms 10.1 MB/sec 1.00 909.3±7.16ms 10.4 MB/sec formatter/vue.global.prod.js 1.07 49.1±1.49ms 2.5 MB/sec 1.00 45.8±0.20ms 2.6 MB/sec ``` --- crates/rome_css_syntax/src/lib.rs | 2 +- crates/rome_formatter/src/format_element.rs | 90 ++++++++++++++++--- crates/rome_formatter/src/printer.rs | 2 +- crates/rome_js_formatter/src/formatter.rs | 14 ++- .../src/utils/binary_like_expression.rs | 21 ++--- crates/rome_js_formatter/src/utils/mod.rs | 15 ++-- crates/rome_js_syntax/src/expr_ext.rs | 4 +- crates/rome_js_syntax/src/lib.rs | 2 +- crates/rome_rowan/src/cursor/node.rs | 10 +-- crates/rome_rowan/src/cursor/token.rs | 7 +- crates/rome_rowan/src/lib.rs | 6 +- crates/rome_rowan/src/syntax/node.rs | 6 +- crates/rome_rowan/src/syntax/token.rs | 5 ++ crates/rome_rowan/src/syntax/trivia.rs | 46 +++++++++- .../{syntax_text.rs => syntax_node_text.rs} | 42 ++++----- crates/rome_rowan/src/syntax_token_text.rs | 79 ++++++++++++++++ 16 files changed, 271 insertions(+), 80 deletions(-) rename crates/rome_rowan/src/{syntax_text.rs => syntax_node_text.rs} (91%) create mode 100644 crates/rome_rowan/src/syntax_token_text.rs diff --git a/crates/rome_css_syntax/src/lib.rs b/crates/rome_css_syntax/src/lib.rs index 688f0d745b4..d9102884ce8 100644 --- a/crates/rome_css_syntax/src/lib.rs +++ b/crates/rome_css_syntax/src/lib.rs @@ -4,7 +4,7 @@ mod syntax_node; pub use self::generated::*; pub use rome_rowan::{ - SyntaxText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent, + SyntaxNodeText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent, }; pub use syntax_node::*; diff --git a/crates/rome_formatter/src/format_element.rs b/crates/rome_formatter/src/format_element.rs index 0b200311312..670e122d9f0 100644 --- a/crates/rome_formatter/src/format_element.rs +++ b/crates/rome_formatter/src/format_element.rs @@ -1,6 +1,8 @@ use crate::intersperse::{Intersperse, IntersperseFn}; -use crate::{format_elements, TextSize}; -use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments}; +use crate::{format_elements, TextRange, TextSize}; +use rome_rowan::{ + Language, SyntaxNode, SyntaxToken, SyntaxTokenText, SyntaxTriviaPieceComments, TextLen, +}; use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use std::ops::Deref; @@ -1058,7 +1060,7 @@ impl List { } impl Deref for List { - type Target = Vec; + type Target = [FormatElement]; fn deref(&self) -> &Self::Target { &self.content @@ -1126,6 +1128,13 @@ pub enum Token { // The position of the dynamic token in the unformatted source code source_position: TextSize, }, + // A token that is taken 1:1 from the source code + SyntaxTokenSlice { + /// The start position of the token in the unformatted source code + source_position: TextSize, + /// The token text + slice: SyntaxTokenText, + }, } impl Debug for Token { @@ -1135,6 +1144,11 @@ impl Debug for Token { match self { Token::Static { text } => write!(fmt, "StaticToken({:?})", text), Token::Dynamic { text, .. } => write!(fmt, "DynamicToken({:?})", text), + Token::SyntaxTokenSlice { + slice: token_text, .. + } => { + write!(fmt, "SyntaxTokenSlice({:?})", token_text) + } } } } @@ -1147,21 +1161,69 @@ impl Token { /// Create a token from a dynamic string and a range of the input source pub fn new_dynamic(text: String, position: TextSize) -> Self { - debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text); + Self::assert_no_newlines(&text); Self::Dynamic { text: text.into_boxed_str(), source_position: position, } } + /// Creates a token from a [Cow] that is a sub-slice over the text of a token. + /// + /// The `start` is the absolute start of the token in the source text. + /// + /// ## Returns + /// * [Token::Dynamic] if `text` is a [Cow::Owned] (text doesn't match syntax token text) + /// * [Token::SyntaxTokenSlice] if `text` is borrowed. Avoids allocating a new string. + pub fn from_syntax_token_cow_slice( + text: Cow, + token: &SyntaxToken, + start: TextSize, + ) -> Self { + Self::assert_no_newlines(&text); + + match text { + Cow::Owned(text) => Self::new_dynamic(text, start), + Cow::Borrowed(text) => { + let range = TextRange::at(start, text.text_len()); + debug_assert_eq!( + text, + &token.text()[range - token.text_range().start()], + "The borrowed string doesn't match the specified token substring" + ); + Token::new_syntax_token_slice(token, range) + } + } + } + + /// Creates a new [Token] with a text backed by the string of [SyntaxToken] + pub fn new_syntax_token_slice(token: &SyntaxToken, range: TextRange) -> Self { + let relative_range = range - token.text_range().start(); + let slice = token.token_text().slice(relative_range); + + Self::assert_no_newlines(&slice); + + Self::SyntaxTokenSlice { + slice, + source_position: range.start(), + } + } + + fn assert_no_newlines(text: &str) { + debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text); + } + /// Get the range of the input source covered by this token, /// or None if the token was synthesized by the formatter - pub fn source(&self) -> Option<&TextSize> { + pub fn source_position(&self) -> Option<&TextSize> { match self { Token::Static { .. } => None, Token::Dynamic { source_position, .. } => Some(source_position), + Token::SyntaxTokenSlice { + source_position, .. + } => Some(source_position), } } } @@ -1181,10 +1243,9 @@ impl From> for Token { impl<'a, L: Language> From<&'a SyntaxToken> for Token { fn from(token: &'a SyntaxToken) -> Self { - Self::new_dynamic( - token.text_trimmed().into(), - token.text_trimmed_range().start(), - ) + let trimmed_range = token.text_trimmed_range(); + + Self::new_syntax_token_slice(token, trimmed_range) } } @@ -1222,9 +1283,11 @@ pub fn normalize_newlines(text: &str, terminators: [char; N]) -> impl From> for Token { fn from(trivia: SyntaxTriviaPieceComments) -> Self { - Self::new_dynamic( - normalize_newlines(trivia.text().trim(), LINE_TERMINATORS).into_owned(), - trivia.text_range().start(), + let range = trivia.text_range(); + Token::from_syntax_token_cow_slice( + normalize_newlines(trivia.text().trim(), LINE_TERMINATORS), + &trivia.as_piece().token(), + range.start(), ) } } @@ -1235,6 +1298,9 @@ impl Deref for Token { match self { Token::Static { text } => text, Token::Dynamic { text, .. } => text, + Token::SyntaxTokenSlice { + slice: token_text, .. + } => token_text.deref(), } } } diff --git a/crates/rome_formatter/src/printer.rs b/crates/rome_formatter/src/printer.rs index d8392d030f2..c07af4d9a51 100644 --- a/crates/rome_formatter/src/printer.rs +++ b/crates/rome_formatter/src/printer.rs @@ -171,7 +171,7 @@ impl<'a> Printer<'a> { self.state.pending_space = false; } - if let Some(source) = token.source() { + if let Some(source) = token.source_position() { self.state.source_markers.push(SourceMarker { source: *source, dest: TextSize::from(self.state.buffer.len() as u32), diff --git a/crates/rome_js_formatter/src/formatter.rs b/crates/rome_js_formatter/src/formatter.rs index 60375a89600..0052c074e32 100644 --- a/crates/rome_js_formatter/src/formatter.rs +++ b/crates/rome_js_formatter/src/formatter.rs @@ -428,12 +428,9 @@ impl Formatter { let skipped_trivia_range = skipped_trivia_range.expect("Only call this method for leading trivia containing at least one skipped token trivia."); // Format the skipped token trivia range - // Compute the offsets relative to the tokens text - let relative_skipped_range = skipped_trivia_range - token.text_range().start(); - let text = &token.text()[relative_skipped_range]; - elements.push(FormatElement::from(Token::new_dynamic( - text.to_string(), - skipped_trivia_range.start(), + elements.push(FormatElement::from(Token::new_syntax_token_slice( + token, + skipped_trivia_range, ))); // `print_trailing_trivia_pieces` and `format_leading_trivia_pieces` remove any whitespace except @@ -647,8 +644,9 @@ impl Formatter { } fn trivia_token(piece: SyntaxTriviaPiece) -> Token { - Token::new_dynamic( - normalize_newlines(piece.text(), LINE_TERMINATORS).into_owned(), + Token::from_syntax_token_cow_slice( + normalize_newlines(piece.text(), LINE_TERMINATORS), + &piece.token(), piece.text_range().start(), ) } diff --git a/crates/rome_js_formatter/src/utils/binary_like_expression.rs b/crates/rome_js_formatter/src/utils/binary_like_expression.rs index e16f20ade68..6f4db87faf6 100644 --- a/crates/rome_js_formatter/src/utils/binary_like_expression.rs +++ b/crates/rome_js_formatter/src/utils/binary_like_expression.rs @@ -210,23 +210,16 @@ fn format_with_or_without_parenthesis( }; let result = if operation_is_higher { - let formatted = if node.has_comments_direct() { - let (leading, content, trailing) = formatted_node.split_trivia(); - format_elements![ - leading, - group_elements(format_elements![ - token("("), - soft_block_indent(format_elements![content, trailing]), - token(")") - ]) - ] - } else { + let (leading, content, trailing) = formatted_node.split_trivia(); + let formatted = format_elements![ + leading, group_elements(format_elements![ token("("), - soft_block_indent(formatted_node), - token(")"), + soft_block_indent(format_elements![content, trailing]), + token(")") ]) - }; + ]; + (formatted, true) } else { (formatted_node, false) diff --git a/crates/rome_js_formatter/src/utils/mod.rs b/crates/rome_js_formatter/src/utils/mod.rs index 2a9f55b418b..90ff5fd0d71 100644 --- a/crates/rome_js_formatter/src/utils/mod.rs +++ b/crates/rome_js_formatter/src/utils/mod.rs @@ -388,8 +388,9 @@ pub(crate) fn format_template_chunk( // In template literals, the '\r' and '\r\n' line terminators are normalized to '\n' Ok(formatter.format_replaced( &chunk, - FormatElement::from(Token::new_dynamic( - normalize_newlines(chunk.text_trimmed(), ['\r']).into_owned(), + FormatElement::from(Token::from_syntax_token_cow_slice( + normalize_newlines(chunk.text_trimmed(), ['\r']), + &chunk, chunk.text_trimmed_range().start(), )), )) @@ -619,16 +620,14 @@ pub(crate) fn format_string_literal_token( if quoted.starts_with(secondary_quote_char) && !quoted.contains(primary_quote_char) { let s = "ed[1..quoted.len() - 1]; let s = format!("{}{}{}", primary_quote_char, s, primary_quote_char); - match normalize_newlines(&s, ['\r']) { - Cow::Borrowed(_) => s, - Cow::Owned(s) => s, - } + Cow::Owned(normalize_newlines(&s, ['\r']).into_owned()) } else { - normalize_newlines(quoted, ['\r']).into_owned() + normalize_newlines(quoted, ['\r']) }; formatter.format_replaced( &token, - Token::new_dynamic(content, token.text_trimmed_range().start()).into(), + Token::from_syntax_token_cow_slice(content, &token, token.text_trimmed_range().start()) + .into(), ) } diff --git a/crates/rome_js_syntax/src/expr_ext.rs b/crates/rome_js_syntax/src/expr_ext.rs index 6e3f8c6ccad..18bfbf77d3c 100644 --- a/crates/rome_js_syntax/src/expr_ext.rs +++ b/crates/rome_js_syntax/src/expr_ext.rs @@ -7,7 +7,7 @@ use crate::{ }; use crate::{JsPreUpdateExpression, JsSyntaxKind::*}; use rome_rowan::{ - AstNode, AstSeparatedList, NodeOrToken, SyntaxResult, SyntaxText, TextRange, TextSize, + AstNode, AstSeparatedList, NodeOrToken, SyntaxNodeText, SyntaxResult, TextRange, TextSize, }; use std::cmp::Ordering; @@ -371,7 +371,7 @@ impl JsNumberLiteralExpression { impl JsStringLiteralExpression { /// Get the inner text of a string not including the quotes - pub fn inner_string_text(&self) -> SyntaxText { + pub fn inner_string_text(&self) -> SyntaxNodeText { let start = self.syntax().text_range().start() + TextSize::from(1); let end_char = self .syntax() diff --git a/crates/rome_js_syntax/src/lib.rs b/crates/rome_js_syntax/src/lib.rs index 331914fb840..82325d82f92 100644 --- a/crates/rome_js_syntax/src/lib.rs +++ b/crates/rome_js_syntax/src/lib.rs @@ -15,7 +15,7 @@ pub use self::generated::*; pub use expr_ext::*; pub use modifier_ext::*; pub use rome_rowan::{ - SyntaxText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent, + SyntaxNodeText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent, }; pub use stmt_ext::*; pub use syntax_node::*; diff --git a/crates/rome_rowan/src/cursor/node.rs b/crates/rome_rowan/src/cursor/node.rs index 32fb06a2964..ef049848937 100644 --- a/crates/rome_rowan/src/cursor/node.rs +++ b/crates/rome_rowan/src/cursor/node.rs @@ -1,7 +1,7 @@ use crate::cursor::{free, GreenElement, NodeData, SyntaxElement, SyntaxToken, SyntaxTrivia}; use crate::green::{Child, Children, Slot}; use crate::{ - Direction, GreenNode, GreenNodeData, NodeOrToken, RawSyntaxKind, SyntaxText, TokenAtOffset, + Direction, GreenNode, GreenNodeData, NodeOrToken, RawSyntaxKind, SyntaxNodeText, TokenAtOffset, WalkEvent, }; use std::borrow::Cow; @@ -173,13 +173,13 @@ impl SyntaxNode { } #[inline] - pub fn text(&self) -> SyntaxText { - SyntaxText::new(self.clone()) + pub fn text(&self) -> SyntaxNodeText { + SyntaxNodeText::new(self.clone()) } #[inline] - pub fn text_trimmed(&self) -> SyntaxText { - SyntaxText::with_range(self.clone(), self.text_trimmed_range()) + pub fn text_trimmed(&self) -> SyntaxNodeText { + SyntaxNodeText::with_range(self.clone(), self.text_trimmed_range()) } #[inline] diff --git a/crates/rome_rowan/src/cursor/token.rs b/crates/rome_rowan/src/cursor/token.rs index f755012e03e..8f9270c92ff 100644 --- a/crates/rome_rowan/src/cursor/token.rs +++ b/crates/rome_rowan/src/cursor/token.rs @@ -1,5 +1,5 @@ use crate::cursor::{free, GreenElement, NodeData, SyntaxElement, SyntaxNode, SyntaxTrivia}; -use crate::{Direction, GreenTokenData, RawSyntaxKind}; +use crate::{Direction, GreenTokenData, RawSyntaxKind, SyntaxTokenText}; use std::hash::{Hash, Hasher}; use std::{fmt, iter, ptr}; use text_size::{TextRange, TextSize}; @@ -88,6 +88,11 @@ impl SyntaxToken { self.green().text() } + #[inline] + pub fn token_text(&self) -> SyntaxTokenText { + SyntaxTokenText::new(self.green().to_owned()) + } + #[inline] pub fn text_trimmed(&self) -> &str { self.green().text_trimmed() diff --git a/crates/rome_rowan/src/lib.rs b/crates/rome_rowan/src/lib.rs index ea40aa1fe39..18197001d92 100644 --- a/crates/rome_rowan/src/lib.rs +++ b/crates/rome_rowan/src/lib.rs @@ -14,7 +14,7 @@ pub mod cursor; mod green; pub mod syntax; -mod syntax_text; +mod syntax_node_text; mod utility_types; #[allow(unsafe_code)] @@ -27,6 +27,7 @@ mod serde_impls; #[allow(unsafe_code)] mod sll; mod syntax_factory; +mod syntax_token_text; mod tree_builder; pub use text_size::{TextLen, TextRange, TextSize}; @@ -40,7 +41,8 @@ pub use crate::{ TriviaPieceKind, }, syntax_factory::*, - syntax_text::SyntaxText, + syntax_node_text::SyntaxNodeText, + syntax_token_text::SyntaxTokenText, tree_builder::{Checkpoint, TreeBuilder}, utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent}, }; diff --git a/crates/rome_rowan/src/syntax/node.rs b/crates/rome_rowan/src/syntax/node.rs index 41d912d39af..2a9396439fb 100644 --- a/crates/rome_rowan/src/syntax/node.rs +++ b/crates/rome_rowan/src/syntax/node.rs @@ -1,7 +1,7 @@ use crate::syntax::element::SyntaxElement; use crate::syntax::SyntaxTrivia; use crate::{ - cursor, Direction, GreenNode, Language, NodeOrToken, SyntaxKind, SyntaxList, SyntaxText, + cursor, Direction, GreenNode, Language, NodeOrToken, SyntaxKind, SyntaxList, SyntaxNodeText, SyntaxToken, TokenAtOffset, WalkEvent, }; use std::fmt; @@ -56,7 +56,7 @@ impl SyntaxNode { /// }); /// assert_eq!("\n\t let \t\ta; \t\t", node.text()); /// ``` - pub fn text(&self) -> SyntaxText { + pub fn text(&self) -> SyntaxNodeText { self.raw.text() } @@ -84,7 +84,7 @@ impl SyntaxNode { /// }); /// assert_eq!("let \t\ta;", node.text_trimmed()); /// ``` - pub fn text_trimmed(&self) -> SyntaxText { + pub fn text_trimmed(&self) -> SyntaxNodeText { self.raw.text_trimmed() } diff --git a/crates/rome_rowan/src/syntax/token.rs b/crates/rome_rowan/src/syntax/token.rs index d9a3dfefbfc..5d0c5ce020a 100644 --- a/crates/rome_rowan/src/syntax/token.rs +++ b/crates/rome_rowan/src/syntax/token.rs @@ -1,4 +1,5 @@ use crate::syntax::SyntaxTrivia; +use crate::syntax_token_text::SyntaxTokenText; use crate::{cursor, Direction, Language, NodeOrToken, SyntaxElement, SyntaxKind, SyntaxNode}; use std::fmt; use std::marker::PhantomData; @@ -46,6 +47,10 @@ impl SyntaxToken { self.raw.text() } + pub fn token_text(&self) -> SyntaxTokenText { + self.raw.token_text() + } + /// Returns the text of the token, excluding all trivia. /// /// ``` diff --git a/crates/rome_rowan/src/syntax/trivia.rs b/crates/rome_rowan/src/syntax/trivia.rs index a3f1128af69..c2f305810ab 100644 --- a/crates/rome_rowan/src/syntax/trivia.rs +++ b/crates/rome_rowan/src/syntax/trivia.rs @@ -1,4 +1,4 @@ -use crate::{cursor, Language}; +use crate::{cursor, Language, SyntaxToken}; use std::fmt; use std::marker::PhantomData; use text_size::{TextRange, TextSize}; @@ -104,6 +104,16 @@ impl SyntaxTriviaPieceNewline { pub fn text_range(&self) -> TextRange { self.0.text_range() } + + /// Returns a reference to its [SyntaxTriviaPiece] + pub fn as_piece(&self) -> &SyntaxTriviaPiece { + &self.0 + } + + /// Returns its [SyntaxTriviaPiece] + pub fn into_piece(self) -> SyntaxTriviaPiece { + self.0 + } } impl SyntaxTriviaPieceWhitespace { @@ -118,6 +128,16 @@ impl SyntaxTriviaPieceWhitespace { pub fn text_range(&self) -> TextRange { self.0.text_range() } + + /// Returns a reference to its [SyntaxTriviaPiece] + pub fn as_piece(&self) -> &SyntaxTriviaPiece { + &self.0 + } + + /// Returns its [SyntaxTriviaPiece] + pub fn into_piece(self) -> SyntaxTriviaPiece { + self.0 + } } impl SyntaxTriviaPieceComments { @@ -136,6 +156,16 @@ impl SyntaxTriviaPieceComments { pub fn has_newline(&self) -> bool { self.0.trivia.kind.is_multiline_comment() } + + /// Returns a reference to its [SyntaxTriviaPiece] + pub fn as_piece(&self) -> &SyntaxTriviaPiece { + &self.0 + } + + /// Returns its [SyntaxTriviaPiece] + pub fn into_piece(self) -> SyntaxTriviaPiece { + self.0 + } } impl SyntaxTriviaPieceSkipped { @@ -150,6 +180,16 @@ impl SyntaxTriviaPieceSkipped { pub fn text_range(&self) -> TextRange { self.0.text_range() } + + /// Returns a reference to its [SyntaxTriviaPiece] + pub fn as_piece(&self) -> &SyntaxTriviaPiece { + &self.0 + } + + /// Returns its [SyntaxTriviaPiece] + pub fn into_piece(self) -> SyntaxTriviaPiece { + self.0 + } } /// [SyntaxTriviaPiece] gives access to the most granular information about the trivia @@ -418,6 +458,10 @@ impl SyntaxTriviaPiece { _ => None, } } + + pub fn token(&self) -> SyntaxToken { + SyntaxToken::from(self.raw.token().clone()) + } } impl fmt::Debug for SyntaxTriviaPiece { diff --git a/crates/rome_rowan/src/syntax_text.rs b/crates/rome_rowan/src/syntax_node_text.rs similarity index 91% rename from crates/rome_rowan/src/syntax_text.rs rename to crates/rome_rowan/src/syntax_node_text.rs index f1c76dfb5ab..ca5fd304cd8 100644 --- a/crates/rome_rowan/src/syntax_text.rs +++ b/crates/rome_rowan/src/syntax_node_text.rs @@ -6,19 +6,19 @@ use crate::{ }; #[derive(Clone)] -pub struct SyntaxText { +pub struct SyntaxNodeText { node: SyntaxNode, range: TextRange, } -impl SyntaxText { - pub(crate) fn new(node: SyntaxNode) -> SyntaxText { +impl SyntaxNodeText { + pub(crate) fn new(node: SyntaxNode) -> SyntaxNodeText { let range = node.text_range(); - SyntaxText { node, range } + SyntaxNodeText { node, range } } - pub(crate) fn with_range(node: SyntaxNode, range: TextRange) -> SyntaxText { - SyntaxText { node, range } + pub(crate) fn with_range(node: SyntaxNode, range: TextRange) -> SyntaxNodeText { + SyntaxNodeText { node, range } } pub fn len(&self) -> TextSize { @@ -61,7 +61,7 @@ impl SyntaxText { found(res) } - pub fn slice(&self, range: R) -> SyntaxText { + pub fn slice(&self, range: R) -> SyntaxNodeText { let start = range.start().unwrap_or_default(); let end = range.end().unwrap_or_else(|| self.len()); assert!(start <= end); @@ -81,7 +81,7 @@ impl SyntaxText { self.range, range, ); - SyntaxText { + SyntaxNodeText { node: self.node.clone(), range, } @@ -135,25 +135,25 @@ fn found(res: Result<(), T>) -> Option { } } -impl fmt::Debug for SyntaxText { +impl fmt::Debug for SyntaxNodeText { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Debug::fmt(&self.to_string(), f) } } -impl fmt::Display for SyntaxText { +impl fmt::Display for SyntaxNodeText { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.try_for_each_chunk(|chunk| fmt::Display::fmt(chunk, f)) } } -impl From for String { - fn from(text: SyntaxText) -> String { +impl From for String { + fn from(text: SyntaxNodeText) -> String { text.to_string() } } -impl PartialEq for SyntaxText { +impl PartialEq for SyntaxNodeText { fn eq(&self, mut rhs: &str) -> bool { self.try_for_each_chunk(|chunk| { if !rhs.starts_with(chunk) { @@ -167,26 +167,26 @@ impl PartialEq for SyntaxText { } } -impl PartialEq for str { - fn eq(&self, rhs: &SyntaxText) -> bool { +impl PartialEq for str { + fn eq(&self, rhs: &SyntaxNodeText) -> bool { rhs == self } } -impl PartialEq<&'_ str> for SyntaxText { +impl PartialEq<&'_ str> for SyntaxNodeText { fn eq(&self, rhs: &&str) -> bool { self == *rhs } } -impl PartialEq for &'_ str { - fn eq(&self, rhs: &SyntaxText) -> bool { +impl PartialEq for &'_ str { + fn eq(&self, rhs: &SyntaxNodeText) -> bool { rhs == self } } -impl PartialEq for SyntaxText { - fn eq(&self, other: &SyntaxText) -> bool { +impl PartialEq for SyntaxNodeText { + fn eq(&self, other: &SyntaxNodeText) -> bool { if self.range.len() != other.range.len() { return false; } @@ -219,7 +219,7 @@ fn zip_texts>(xs: &mut I, ys: &mut } } -impl Eq for SyntaxText {} +impl Eq for SyntaxNodeText {} mod private { use std::ops; diff --git a/crates/rome_rowan/src/syntax_token_text.rs b/crates/rome_rowan/src/syntax_token_text.rs new file mode 100644 index 00000000000..e0da47ed9d0 --- /dev/null +++ b/crates/rome_rowan/src/syntax_token_text.rs @@ -0,0 +1,79 @@ +use crate::GreenToken; +use std::fmt::Formatter; +use std::ops::Deref; +use text_size::{TextRange, TextSize}; + +/// Reference to the text of a SyntaxToken without having to worry about the lifetime of `&str`. +#[derive(Eq, Clone)] +pub struct SyntaxTokenText { + // Using a green token to ensure this type is Send + Sync. + token: GreenToken, + /// Relative range of the "selected" token text. + range: TextRange, +} + +impl SyntaxTokenText { + pub(crate) fn new(token: GreenToken) -> SyntaxTokenText { + let range = TextRange::at(TextSize::default(), token.text_len()); + Self { token, range } + } + + /// Returns the length of the text + pub fn len(&self) -> TextSize { + self.range.len() + } + + /// Returns `true` if the text is empty + pub fn is_empty(&self) -> bool { + self.range.is_empty() + } + + /// Returns a subslice of the text. + pub fn slice(mut self, range: TextRange) -> SyntaxTokenText { + assert!( + self.range.contains_range(range), + "Range {range:?} exceeds bounds {:?}", + self.range + ); + + self.range = range; + self + } +} + +impl Deref for SyntaxTokenText { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.token.text()[self.range] + } +} + +impl std::fmt::Display for SyntaxTokenText { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", *self) + } +} + +impl std::fmt::Debug for SyntaxTokenText { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", *self) + } +} +impl PartialEq for SyntaxTokenText { + fn eq(&self, other: &Self) -> bool { + **self == **other + } +} + +impl PartialEq<&'_ str> for SyntaxTokenText { + fn eq(&self, rhs: &&'_ str) -> bool { + **self == **rhs + } +} + +impl PartialEq for &'_ str { + fn eq(&self, other: &SyntaxTokenText) -> bool { + **self == **other + } +} From dcb40ca683542f085e4f26125fbd9b692458855e Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 19 Apr 2022 12:25:10 +0200 Subject: [PATCH 2/3] Code review feedback --- crates/rome_formatter/src/format_element.rs | 20 ++++++++++---------- crates/rome_rowan/src/syntax/token.rs | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/crates/rome_formatter/src/format_element.rs b/crates/rome_formatter/src/format_element.rs index 670e122d9f0..7b0cce1a708 100644 --- a/crates/rome_formatter/src/format_element.rs +++ b/crates/rome_formatter/src/format_element.rs @@ -1128,7 +1128,8 @@ pub enum Token { // The position of the dynamic token in the unformatted source code source_position: TextSize, }, - // A token that is taken 1:1 from the source code + /// A token for a text that is taken as is from the source code (input text and formatted representation are identical). + /// Implementing by taking a slice from a `SyntaxToken` to avoid allocating a new string. SyntaxTokenSlice { /// The start position of the token in the unformatted source code source_position: TextSize, @@ -1161,7 +1162,8 @@ impl Token { /// Create a token from a dynamic string and a range of the input source pub fn new_dynamic(text: String, position: TextSize) -> Self { - Self::assert_no_newlines(&text); + debug_assert_no_newlines(&text); + Self::Dynamic { text: text.into_boxed_str(), source_position: position, @@ -1180,8 +1182,6 @@ impl Token { token: &SyntaxToken, start: TextSize, ) -> Self { - Self::assert_no_newlines(&text); - match text { Cow::Owned(text) => Self::new_dynamic(text, start), Cow::Borrowed(text) => { @@ -1189,7 +1189,7 @@ impl Token { debug_assert_eq!( text, &token.text()[range - token.text_range().start()], - "The borrowed string doesn't match the specified token substring" + "The borrowed string doesn't match the specified token substring. Does the borrowed string belong to this token and range?" ); Token::new_syntax_token_slice(token, range) } @@ -1201,7 +1201,7 @@ impl Token { let relative_range = range - token.text_range().start(); let slice = token.token_text().slice(relative_range); - Self::assert_no_newlines(&slice); + debug_assert_no_newlines(&slice); Self::SyntaxTokenSlice { slice, @@ -1209,10 +1209,6 @@ impl Token { } } - fn assert_no_newlines(text: &str) { - debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text); - } - /// Get the range of the input source covered by this token, /// or None if the token was synthesized by the formatter pub fn source_position(&self) -> Option<&TextSize> { @@ -1228,6 +1224,10 @@ impl Token { } } +fn debug_assert_no_newlines(text: &str) { + debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text); +} + // Token equality only compares the text content impl PartialEq for Token { fn eq(&self, other: &Self) -> bool { diff --git a/crates/rome_rowan/src/syntax/token.rs b/crates/rome_rowan/src/syntax/token.rs index 5d0c5ce020a..eba0d838752 100644 --- a/crates/rome_rowan/src/syntax/token.rs +++ b/crates/rome_rowan/src/syntax/token.rs @@ -47,6 +47,7 @@ impl SyntaxToken { self.raw.text() } + /// Returns the text of a token, including all trivia as an owned value. pub fn token_text(&self) -> SyntaxTokenText { self.raw.token_text() } From a93522ba05bf2c70fa14ade0c4133032f33d600d Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 20 Apr 2022 16:03:46 +0200 Subject: [PATCH 3/3] Rebase --- crates/rome_json_syntax/src/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/rome_json_syntax/src/lib.rs b/crates/rome_json_syntax/src/lib.rs index 78c49ff08de..a9d8dab9bf3 100644 --- a/crates/rome_json_syntax/src/lib.rs +++ b/crates/rome_json_syntax/src/lib.rs @@ -3,9 +3,7 @@ mod generated; mod syntax_node; pub use self::generated::*; -pub use rome_rowan::{ - SyntaxText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent, -}; +pub use rome_rowan::{TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent}; pub use syntax_node::*; use rome_rowan::RawSyntaxKind;