Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

perf(rome_js_formatter): Reduce the String allocations for Tokens #2462

Merged
merged 3 commits into from
Apr 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/rome_css_syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod syntax_node;

pub use self::generated::*;
pub use rome_rowan::{
SyntaxText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent,
SyntaxNodeText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent,
};
pub use syntax_node::*;

Expand Down
90 changes: 78 additions & 12 deletions crates/rome_formatter/src/format_element.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use crate::intersperse::{Intersperse, IntersperseFn};
use crate::{format_elements, TextSize};
use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments};
use crate::{format_elements, TextRange, TextSize};
use rome_rowan::{
Language, SyntaxNode, SyntaxToken, SyntaxTokenText, SyntaxTriviaPieceComments, TextLen,
};
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
use std::ops::Deref;
Expand Down Expand Up @@ -1058,7 +1060,7 @@ impl List {
}

impl Deref for List {
type Target = Vec<FormatElement>;
type Target = [FormatElement];

fn deref(&self) -> &Self::Target {
&self.content
Expand Down Expand Up @@ -1126,6 +1128,14 @@ pub enum Token {
// The position of the dynamic token in the unformatted source code
source_position: TextSize,
},
/// A token for a text that is taken as is from the source code (input text and formatted representation are identical).
/// Implementing by taking a slice from a `SyntaxToken` to avoid allocating a new string.
SyntaxTokenSlice {
/// The start position of the token in the unformatted source code
source_position: TextSize,
/// The token text
slice: SyntaxTokenText,
},
}

impl Debug for Token {
Expand All @@ -1135,6 +1145,11 @@ impl Debug for Token {
match self {
Token::Static { text } => write!(fmt, "StaticToken({:?})", text),
Token::Dynamic { text, .. } => write!(fmt, "DynamicToken({:?})", text),
Token::SyntaxTokenSlice {
slice: token_text, ..
} => {
write!(fmt, "SyntaxTokenSlice({:?})", token_text)
}
}
}
}
Expand All @@ -1147,25 +1162,72 @@ impl Token {

/// Create a token from a dynamic string and a range of the input source
pub fn new_dynamic(text: String, position: TextSize) -> Self {
debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text);
debug_assert_no_newlines(&text);

Self::Dynamic {
text: text.into_boxed_str(),
source_position: position,
}
}

/// Creates a token from a [Cow] that is a sub-slice over the text of a token.
///
/// The `start` is the absolute start of the token in the source text.
///
/// ## Returns
/// * [Token::Dynamic] if `text` is a [Cow::Owned] (text doesn't match syntax token text)
/// * [Token::SyntaxTokenSlice] if `text` is borrowed. Avoids allocating a new string.
ematipico marked this conversation as resolved.
Show resolved Hide resolved
pub fn from_syntax_token_cow_slice<L: Language>(
text: Cow<str>,
token: &SyntaxToken<L>,
start: TextSize,
) -> Self {
match text {
Cow::Owned(text) => Self::new_dynamic(text, start),
Cow::Borrowed(text) => {
let range = TextRange::at(start, text.text_len());
debug_assert_eq!(
text,
&token.text()[range - token.text_range().start()],
"The borrowed string doesn't match the specified token substring. Does the borrowed string belong to this token and range?"
);
Token::new_syntax_token_slice(token, range)
}
}
}

/// Creates a new [Token] with a text backed by the string of [SyntaxToken]
pub fn new_syntax_token_slice<L: Language>(token: &SyntaxToken<L>, range: TextRange) -> Self {
let relative_range = range - token.text_range().start();
let slice = token.token_text().slice(relative_range);

debug_assert_no_newlines(&slice);

Self::SyntaxTokenSlice {
slice,
source_position: range.start(),
}
}

/// Get the range of the input source covered by this token,
/// or None if the token was synthesized by the formatter
pub fn source(&self) -> Option<&TextSize> {
pub fn source_position(&self) -> Option<&TextSize> {
match self {
Token::Static { .. } => None,
Token::Dynamic {
source_position, ..
} => Some(source_position),
Token::SyntaxTokenSlice {
source_position, ..
} => Some(source_position),
}
}
}

fn debug_assert_no_newlines(text: &str) {
debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text);
}

// Token equality only compares the text content
impl PartialEq for Token {
fn eq(&self, other: &Self) -> bool {
Expand All @@ -1181,10 +1243,9 @@ impl<L: Language> From<SyntaxToken<L>> for Token {

impl<'a, L: Language> From<&'a SyntaxToken<L>> for Token {
fn from(token: &'a SyntaxToken<L>) -> Self {
Self::new_dynamic(
token.text_trimmed().into(),
token.text_trimmed_range().start(),
)
let trimmed_range = token.text_trimmed_range();

Self::new_syntax_token_slice(token, trimmed_range)
}
}

Expand Down Expand Up @@ -1222,9 +1283,11 @@ pub fn normalize_newlines<const N: usize>(text: &str, terminators: [char; N]) ->

impl<L: Language> From<SyntaxTriviaPieceComments<L>> for Token {
fn from(trivia: SyntaxTriviaPieceComments<L>) -> Self {
Self::new_dynamic(
normalize_newlines(trivia.text().trim(), LINE_TERMINATORS).into_owned(),
trivia.text_range().start(),
let range = trivia.text_range();
Token::from_syntax_token_cow_slice(
normalize_newlines(trivia.text().trim(), LINE_TERMINATORS),
&trivia.as_piece().token(),
range.start(),
)
}
}
Expand All @@ -1235,6 +1298,9 @@ impl Deref for Token {
match self {
Token::Static { text } => text,
Token::Dynamic { text, .. } => text,
Token::SyntaxTokenSlice {
slice: token_text, ..
} => token_text.deref(),
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/rome_formatter/src/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ impl<'a> Printer<'a> {
self.state.pending_space = false;
}

if let Some(source) = token.source() {
if let Some(source) = token.source_position() {
self.state.source_markers.push(SourceMarker {
source: *source,
dest: TextSize::from(self.state.buffer.len() as u32),
Expand Down
14 changes: 6 additions & 8 deletions crates/rome_js_formatter/src/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,12 +428,9 @@ impl Formatter {
let skipped_trivia_range = skipped_trivia_range.expect("Only call this method for leading trivia containing at least one skipped token trivia.");

// Format the skipped token trivia range
// Compute the offsets relative to the tokens text
let relative_skipped_range = skipped_trivia_range - token.text_range().start();
let text = &token.text()[relative_skipped_range];
elements.push(FormatElement::from(Token::new_dynamic(
text.to_string(),
skipped_trivia_range.start(),
elements.push(FormatElement::from(Token::new_syntax_token_slice(
token,
skipped_trivia_range,
)));

// `print_trailing_trivia_pieces` and `format_leading_trivia_pieces` remove any whitespace except
Expand Down Expand Up @@ -647,8 +644,9 @@ impl Formatter {
}

fn trivia_token<L: Language>(piece: SyntaxTriviaPiece<L>) -> Token {
Token::new_dynamic(
normalize_newlines(piece.text(), LINE_TERMINATORS).into_owned(),
Token::from_syntax_token_cow_slice(
normalize_newlines(piece.text(), LINE_TERMINATORS),
&piece.token(),
piece.text_range().start(),
)
}
Expand Down
21 changes: 7 additions & 14 deletions crates/rome_js_formatter/src/utils/binary_like_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,23 +210,16 @@ fn format_with_or_without_parenthesis(
};

let result = if operation_is_higher {
let formatted = if node.has_comments_direct() {
let (leading, content, trailing) = formatted_node.split_trivia();
format_elements![
leading,
group_elements(format_elements![
token("("),
soft_block_indent(format_elements![content, trailing]),
token(")")
])
]
} else {
let (leading, content, trailing) = formatted_node.split_trivia();
ematipico marked this conversation as resolved.
Show resolved Hide resolved
let formatted = format_elements![
leading,
group_elements(format_elements![
token("("),
soft_block_indent(formatted_node),
token(")"),
soft_block_indent(format_elements![content, trailing]),
token(")")
])
};
];

(formatted, true)
} else {
(formatted_node, false)
Expand Down
15 changes: 7 additions & 8 deletions crates/rome_js_formatter/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,9 @@ pub(crate) fn format_template_chunk(
// In template literals, the '\r' and '\r\n' line terminators are normalized to '\n'
Ok(formatter.format_replaced(
&chunk,
FormatElement::from(Token::new_dynamic(
normalize_newlines(chunk.text_trimmed(), ['\r']).into_owned(),
FormatElement::from(Token::from_syntax_token_cow_slice(
normalize_newlines(chunk.text_trimmed(), ['\r']),
&chunk,
chunk.text_trimmed_range().start(),
)),
))
Expand Down Expand Up @@ -619,16 +620,14 @@ pub(crate) fn format_string_literal_token(
if quoted.starts_with(secondary_quote_char) && !quoted.contains(primary_quote_char) {
let s = &quoted[1..quoted.len() - 1];
let s = format!("{}{}{}", primary_quote_char, s, primary_quote_char);
match normalize_newlines(&s, ['\r']) {
Cow::Borrowed(_) => s,
Cow::Owned(s) => s,
}
Cow::Owned(normalize_newlines(&s, ['\r']).into_owned())
ematipico marked this conversation as resolved.
Show resolved Hide resolved
} else {
normalize_newlines(quoted, ['\r']).into_owned()
normalize_newlines(quoted, ['\r'])
};

formatter.format_replaced(
&token,
Token::new_dynamic(content, token.text_trimmed_range().start()).into(),
Token::from_syntax_token_cow_slice(content, &token, token.text_trimmed_range().start())
.into(),
)
}
4 changes: 2 additions & 2 deletions crates/rome_js_syntax/src/expr_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{
};
use crate::{JsPreUpdateExpression, JsSyntaxKind::*};
use rome_rowan::{
AstNode, AstSeparatedList, NodeOrToken, SyntaxResult, SyntaxText, TextRange, TextSize,
AstNode, AstSeparatedList, NodeOrToken, SyntaxNodeText, SyntaxResult, TextRange, TextSize,
};
use std::cmp::Ordering;

Expand Down Expand Up @@ -371,7 +371,7 @@ impl JsNumberLiteralExpression {

impl JsStringLiteralExpression {
/// Get the inner text of a string not including the quotes
pub fn inner_string_text(&self) -> SyntaxText {
pub fn inner_string_text(&self) -> SyntaxNodeText {
let start = self.syntax().text_range().start() + TextSize::from(1);
let end_char = self
.syntax()
Expand Down
2 changes: 1 addition & 1 deletion crates/rome_js_syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub use self::generated::*;
pub use expr_ext::*;
pub use modifier_ext::*;
pub use rome_rowan::{
SyntaxText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent,
SyntaxNodeText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent,
};
pub use stmt_ext::*;
pub use syntax_node::*;
Expand Down
4 changes: 1 addition & 3 deletions crates/rome_json_syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ mod generated;
mod syntax_node;

pub use self::generated::*;
pub use rome_rowan::{
SyntaxText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent,
};
pub use rome_rowan::{TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent};
pub use syntax_node::*;

use rome_rowan::RawSyntaxKind;
Expand Down
10 changes: 5 additions & 5 deletions crates/rome_rowan/src/cursor/node.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::cursor::{free, GreenElement, NodeData, SyntaxElement, SyntaxToken, SyntaxTrivia};
use crate::green::{Child, Children, Slot};
use crate::{
Direction, GreenNode, GreenNodeData, NodeOrToken, RawSyntaxKind, SyntaxText, TokenAtOffset,
Direction, GreenNode, GreenNodeData, NodeOrToken, RawSyntaxKind, SyntaxNodeText, TokenAtOffset,
WalkEvent,
};
use std::borrow::Cow;
Expand Down Expand Up @@ -173,13 +173,13 @@ impl SyntaxNode {
}

#[inline]
pub fn text(&self) -> SyntaxText {
SyntaxText::new(self.clone())
pub fn text(&self) -> SyntaxNodeText {
SyntaxNodeText::new(self.clone())
}

#[inline]
pub fn text_trimmed(&self) -> SyntaxText {
SyntaxText::with_range(self.clone(), self.text_trimmed_range())
pub fn text_trimmed(&self) -> SyntaxNodeText {
SyntaxNodeText::with_range(self.clone(), self.text_trimmed_range())
}

#[inline]
Expand Down
7 changes: 6 additions & 1 deletion crates/rome_rowan/src/cursor/token.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::cursor::{free, GreenElement, NodeData, SyntaxElement, SyntaxNode, SyntaxTrivia};
use crate::{Direction, GreenTokenData, RawSyntaxKind};
use crate::{Direction, GreenTokenData, RawSyntaxKind, SyntaxTokenText};
use std::hash::{Hash, Hasher};
use std::{fmt, iter, ptr};
use text_size::{TextRange, TextSize};
Expand Down Expand Up @@ -88,6 +88,11 @@ impl SyntaxToken {
self.green().text()
}

#[inline]
pub fn token_text(&self) -> SyntaxTokenText {
SyntaxTokenText::new(self.green().to_owned())
ematipico marked this conversation as resolved.
Show resolved Hide resolved
}

#[inline]
pub fn text_trimmed(&self) -> &str {
self.green().text_trimmed()
Expand Down
6 changes: 4 additions & 2 deletions crates/rome_rowan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub mod cursor;
mod green;

pub mod syntax;
mod syntax_text;
mod syntax_node_text;
mod utility_types;

#[allow(unsafe_code)]
Expand All @@ -27,6 +27,7 @@ mod serde_impls;
#[allow(unsafe_code)]
mod sll;
mod syntax_factory;
mod syntax_token_text;
mod tree_builder;

pub use text_size::{TextLen, TextRange, TextSize};
Expand All @@ -40,7 +41,8 @@ pub use crate::{
TriviaPieceKind,
},
syntax_factory::*,
syntax_text::SyntaxText,
syntax_node_text::SyntaxNodeText,
syntax_token_text::SyntaxTokenText,
tree_builder::{Checkpoint, TreeBuilder},
utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent},
};
Expand Down
Loading