From e357be1e963f51ccbc89c70cfb8bf599e48e924e Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Fri, 22 Sep 2023 18:30:40 +0530 Subject: [PATCH 1/3] Ignore quote escapes in expression part of f-string --- .../src/expression/string.rs | 48 ++++++++++++++----- scripts/formatter_ecosystem_checks.sh | 2 +- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index a76230fa2dede..9a424b1a9bd58 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -138,16 +138,8 @@ impl<'a> FormatString<'a> { impl<'a> Format> for FormatString<'a> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { - // TODO(dhruvmanila): With PEP 701, comments can be inside f-strings. - // This is to mark all of those comments as formatted but we need to - // figure out how to handle them. - if matches!(self.string, AnyString::FString(_)) { - f.context() - .comments() - .mark_verbatim_node_comments_formatted(self.string.into()); - } let locator = f.context().locator(); - match self.layout { + let result = match self.layout { StringLayout::Default => { if self.string.is_implicit_concatenated() { in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f) @@ -170,7 +162,18 @@ impl<'a> Format> for FormatString<'a> { StringLayout::ImplicitConcatenatedStringInBinaryLike => { FormatStringContinuation::new(self.string).fmt(f) } + }; + // TODO(dhruvmanila): With PEP 701, comments can be inside f-strings. + // This is to mark all of those comments as formatted but we need to + // figure out how to handle them. Note that this needs to be done only + // after the f-string is formatted, so only for all the non-formatted + // comments. + if matches!(self.string, AnyString::FString(_)) { + f.context() + .comments() + .mark_verbatim_node_comments_formatted(self.string.into()); } + result } } @@ -430,7 +433,7 @@ impl StringPart { let normalized = normalize_string( locator.slice(self.content_range), preferred_quotes, - self.prefix.is_raw_string(), + self.prefix, ); NormalizedString { @@ -523,6 +526,10 @@ impl StringPrefix { pub(super) const fn is_raw_string(self) -> bool { self.contains(StringPrefix::RAW) || self.contains(StringPrefix::RAW_UPPER) } + + pub(super) const fn is_fstring(self) -> bool { + self.contains(StringPrefix::F_STRING) + } } impl Format> for StringPrefix { @@ -760,7 +767,7 @@ impl Format> for StringQuotes { /// with the provided `style`. /// /// Returns the normalized string and whether it contains new lines. -fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow { +fn normalize_string(input: &str, quotes: StringQuotes, prefix: StringPrefix) -> Cow { // The normalized string if `input` is not yet normalized. // `output` must remain empty if `input` is already normalized. let mut output = String::new(); @@ -774,7 +781,22 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow let mut chars = input.char_indices(); + let is_raw = prefix.is_raw_string(); + let is_fstring = prefix.is_fstring(); + let mut in_formatted_value = false; + while let Some((index, c)) = chars.next() { + if is_fstring && matches!(c, '{' | '}') { + if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) { + if next == c { + // Skip over the second character of the double braces + chars.next(); + } else { + in_formatted_value = c == '{'; + } + } + continue; + } if c == '\r' { output.push_str(&input[last_index..index]); @@ -792,7 +814,7 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow if c == '\\' { if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) { #[allow(clippy::if_same_then_else)] - if next == opposite_quote { + if next == opposite_quote && !in_formatted_value { // Remove the escape by ending before the backslash and starting again with the quote chars.next(); output.push_str(&input[last_index..index]); @@ -805,7 +827,7 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow chars.next(); } } - } else if c == preferred_quote { + } else if c == preferred_quote && !in_formatted_value { // Escape the quote output.push_str(&input[last_index..index]); output.push('\\'); diff --git a/scripts/formatter_ecosystem_checks.sh b/scripts/formatter_ecosystem_checks.sh index 46966c91cc224..5c099cf7046e5 100755 --- a/scripts/formatter_ecosystem_checks.sh +++ b/scripts/formatter_ecosystem_checks.sh @@ -64,7 +64,7 @@ git -C "$dir/cpython" checkout 1a1bfc28912a39b500c578e9f10a8a222638d411 time cargo run --bin ruff_dev -- format-dev --stability-check \ --error-file "$target/progress_projects_errors.txt" --log-file "$target/progress_projects_log.txt" --stats-file "$target/progress_projects_stats.txt" \ - --files-with-errors 16 --multi-project "$dir" || ( + --files-with-errors 15 --multi-project "$dir" || ( echo "Ecosystem check failed" cat "$target/progress_projects_log.txt" exit 1 From b89947cb2a341ed3db71f6a906c03077972bb1f8 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Fri, 22 Sep 2023 22:47:08 +0530 Subject: [PATCH 2/3] Mark comments as formatted for each fstring value --- crates/ruff_python_formatter/src/expression/string.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 9a424b1a9bd58..682ba06943184 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -168,10 +168,11 @@ impl<'a> Format> for FormatString<'a> { // figure out how to handle them. Note that this needs to be done only // after the f-string is formatted, so only for all the non-formatted // comments. - if matches!(self.string, AnyString::FString(_)) { - f.context() - .comments() - .mark_verbatim_node_comments_formatted(self.string.into()); + if let AnyString::FString(fstring) = self.string { + let comments = f.context().comments(); + fstring.values.iter().for_each(|value| { + comments.mark_verbatim_node_comments_formatted(value.into()); + }); } result } From 6e02c451c0ef2d7f902927b4e11aa84a76464700 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Fri, 22 Sep 2023 22:47:35 +0530 Subject: [PATCH 3/3] Use formatted value counter and peekable char indices --- .../src/expression/string.rs | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 682ba06943184..8756aeee24bef 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -780,21 +780,22 @@ fn normalize_string(input: &str, quotes: StringQuotes, prefix: StringPrefix) -> let preferred_quote = style.as_char(); let opposite_quote = style.invert().as_char(); - let mut chars = input.char_indices(); + let mut chars = input.char_indices().peekable(); let is_raw = prefix.is_raw_string(); let is_fstring = prefix.is_fstring(); - let mut in_formatted_value = false; + let mut formatted_value_nesting = 0u32; while let Some((index, c)) = chars.next() { if is_fstring && matches!(c, '{' | '}') { - if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) { - if next == c { - // Skip over the second character of the double braces - chars.next(); - } else { - in_formatted_value = c == '{'; - } + if chars.peek().copied().is_some_and(|(_, next)| next == c) { + // Skip over the second character of the double braces + chars.next(); + } else if c == '{' { + formatted_value_nesting += 1; + } else { + // Safe to assume that `c == '}'` here because of the matched pattern above + formatted_value_nesting = formatted_value_nesting.saturating_sub(1); } continue; } @@ -802,7 +803,7 @@ fn normalize_string(input: &str, quotes: StringQuotes, prefix: StringPrefix) -> output.push_str(&input[last_index..index]); // Skip over the '\r' character, keep the `\n` - if input.as_bytes().get(index + 1).copied() == Some(b'\n') { + if chars.peek().copied().is_some_and(|(_, next)| next == '\n') { chars.next(); } // Replace the `\r` with a `\n` @@ -813,9 +814,9 @@ fn normalize_string(input: &str, quotes: StringQuotes, prefix: StringPrefix) -> last_index = index + '\r'.len_utf8(); } else if !quotes.triple && !is_raw { if c == '\\' { - if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) { + if let Some((_, next)) = chars.peek().copied() { #[allow(clippy::if_same_then_else)] - if next == opposite_quote && !in_formatted_value { + if next == opposite_quote && formatted_value_nesting == 0 { // Remove the escape by ending before the backslash and starting again with the quote chars.next(); output.push_str(&input[last_index..index]); @@ -828,7 +829,7 @@ fn normalize_string(input: &str, quotes: StringQuotes, prefix: StringPrefix) -> chars.next(); } } - } else if c == preferred_quote && !in_formatted_value { + } else if c == preferred_quote && formatted_value_nesting == 0 { // Escape the quote output.push_str(&input[last_index..index]); output.push('\\');