Skip to content

Commit fe79798

Browse files
authored
split string module (#9987)
1 parent bb8d203 commit fe79798

File tree

4 files changed

+847
-813
lines changed

4 files changed

+847
-813
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
use std::iter::FusedIterator;
2+
3+
use memchr::memchr2;
4+
5+
use ruff_python_ast::{
6+
self as ast, AnyNodeRef, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
7+
StringLiteral,
8+
};
9+
use ruff_source_file::Locator;
10+
use ruff_text_size::{Ranged, TextLen, TextRange};
11+
12+
use crate::expression::expr_f_string::f_string_quoting;
13+
use crate::other::f_string::FormatFString;
14+
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
15+
use crate::prelude::*;
16+
use crate::string::{Quoting, StringPrefix, StringQuotes};
17+
18+
/// Represents any kind of string expression. This could be either a string,
19+
/// bytes or f-string.
20+
#[derive(Copy, Clone, Debug)]
21+
pub(crate) enum AnyString<'a> {
22+
String(&'a ExprStringLiteral),
23+
Bytes(&'a ExprBytesLiteral),
24+
FString(&'a ExprFString),
25+
}
26+
27+
impl<'a> AnyString<'a> {
28+
/// Creates a new [`AnyString`] from the given [`Expr`].
29+
///
30+
/// Returns `None` if the expression is not either a string, bytes or f-string.
31+
pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
32+
match expression {
33+
Expr::StringLiteral(string) => Some(AnyString::String(string)),
34+
Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)),
35+
Expr::FString(fstring) => Some(AnyString::FString(fstring)),
36+
_ => None,
37+
}
38+
}
39+
40+
/// Returns `true` if the string is implicitly concatenated.
41+
pub(crate) fn is_implicit_concatenated(self) -> bool {
42+
match self {
43+
Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
44+
Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
45+
Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(),
46+
}
47+
}
48+
49+
/// Returns the quoting to be used for this string.
50+
pub(super) fn quoting(self, locator: &Locator<'_>) -> Quoting {
51+
match self {
52+
Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
53+
Self::FString(f_string) => f_string_quoting(f_string, locator),
54+
}
55+
}
56+
57+
/// Returns a vector of all the [`AnyStringPart`] of this string.
58+
pub(super) fn parts(self, quoting: Quoting) -> AnyStringPartsIter<'a> {
59+
match self {
60+
Self::String(ExprStringLiteral { value, .. }) => {
61+
AnyStringPartsIter::String(value.iter())
62+
}
63+
Self::Bytes(ExprBytesLiteral { value, .. }) => AnyStringPartsIter::Bytes(value.iter()),
64+
Self::FString(ExprFString { value, .. }) => {
65+
AnyStringPartsIter::FString(value.iter(), quoting)
66+
}
67+
}
68+
}
69+
70+
pub(crate) fn is_multiline(self, source: &str) -> bool {
71+
match self {
72+
AnyString::String(_) | AnyString::Bytes(_) => {
73+
let contents = &source[self.range()];
74+
let prefix = StringPrefix::parse(contents);
75+
let quotes = StringQuotes::parse(
76+
&contents[TextRange::new(prefix.text_len(), contents.text_len())],
77+
);
78+
79+
quotes.is_some_and(StringQuotes::is_triple)
80+
&& memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
81+
}
82+
AnyString::FString(fstring) => {
83+
memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
84+
}
85+
}
86+
}
87+
}
88+
89+
impl Ranged for AnyString<'_> {
90+
fn range(&self) -> TextRange {
91+
match self {
92+
Self::String(expr) => expr.range(),
93+
Self::Bytes(expr) => expr.range(),
94+
Self::FString(expr) => expr.range(),
95+
}
96+
}
97+
}
98+
99+
impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
100+
fn from(value: &AnyString<'a>) -> Self {
101+
match value {
102+
AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr),
103+
AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr),
104+
AnyString::FString(expr) => AnyNodeRef::ExprFString(expr),
105+
}
106+
}
107+
}
108+
109+
impl<'a> From<AnyString<'a>> for AnyNodeRef<'a> {
110+
fn from(value: AnyString<'a>) -> Self {
111+
AnyNodeRef::from(&value)
112+
}
113+
}
114+
115+
impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
116+
fn from(value: &AnyString<'a>) -> Self {
117+
match value {
118+
AnyString::String(expr) => ExpressionRef::StringLiteral(expr),
119+
AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr),
120+
AnyString::FString(expr) => ExpressionRef::FString(expr),
121+
}
122+
}
123+
}
124+
125+
pub(super) enum AnyStringPartsIter<'a> {
126+
String(std::slice::Iter<'a, StringLiteral>),
127+
Bytes(std::slice::Iter<'a, ast::BytesLiteral>),
128+
FString(std::slice::Iter<'a, ast::FStringPart>, Quoting),
129+
}
130+
131+
impl<'a> Iterator for AnyStringPartsIter<'a> {
132+
type Item = AnyStringPart<'a>;
133+
134+
fn next(&mut self) -> Option<Self::Item> {
135+
let part = match self {
136+
Self::String(inner) => {
137+
let part = inner.next()?;
138+
AnyStringPart::String {
139+
part,
140+
layout: StringLiteralKind::String,
141+
}
142+
}
143+
Self::Bytes(inner) => AnyStringPart::Bytes(inner.next()?),
144+
Self::FString(inner, quoting) => {
145+
let part = inner.next()?;
146+
match part {
147+
ast::FStringPart::Literal(string_literal) => AnyStringPart::String {
148+
part: string_literal,
149+
layout: StringLiteralKind::InImplicitlyConcatenatedFString(*quoting),
150+
},
151+
ast::FStringPart::FString(f_string) => AnyStringPart::FString {
152+
part: f_string,
153+
quoting: *quoting,
154+
},
155+
}
156+
}
157+
};
158+
159+
Some(part)
160+
}
161+
}
162+
163+
impl FusedIterator for AnyStringPartsIter<'_> {}
164+
165+
/// Represents any kind of string which is part of an implicitly concatenated
166+
/// string. This could be either a string, bytes or f-string.
167+
///
168+
/// This is constructed from the [`AnyString::parts`] method on [`AnyString`].
169+
#[derive(Clone, Debug)]
170+
pub(super) enum AnyStringPart<'a> {
171+
String {
172+
part: &'a ast::StringLiteral,
173+
layout: StringLiteralKind,
174+
},
175+
Bytes(&'a ast::BytesLiteral),
176+
FString {
177+
part: &'a ast::FString,
178+
quoting: Quoting,
179+
},
180+
}
181+
182+
impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
183+
fn from(value: &AnyStringPart<'a>) -> Self {
184+
match value {
185+
AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part),
186+
AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
187+
AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part),
188+
}
189+
}
190+
}
191+
192+
impl Ranged for AnyStringPart<'_> {
193+
fn range(&self) -> TextRange {
194+
match self {
195+
Self::String { part, .. } => part.range(),
196+
Self::Bytes(part) => part.range(),
197+
Self::FString { part, .. } => part.range(),
198+
}
199+
}
200+
}
201+
202+
impl Format<PyFormatContext<'_>> for AnyStringPart<'_> {
203+
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
204+
match self {
205+
AnyStringPart::String { part, layout } => {
206+
FormatStringLiteral::new(part, *layout).fmt(f)
207+
}
208+
AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
209+
AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f),
210+
}
211+
}
212+
}

crates/ruff_python_formatter/src/string/docstring.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ use super::{NormalizedString, QuoteChar};
109109
/// `indent-width * spaces` to tabs because doing so could break ASCII art and other docstrings
110110
/// that use spaces for alignment.
111111
pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
112-
let docstring = &normalized.text;
112+
let docstring = &normalized.text();
113113

114114
// Black doesn't change the indentation of docstrings that contain an escaped newline
115115
if contains_unescaped_newline(docstring) {
@@ -125,7 +125,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
125125
let mut lines = docstring.split('\n').peekable();
126126

127127
// Start the string
128-
write!(f, [normalized.prefix, normalized.quotes])?;
128+
write!(f, [normalized.prefix(), normalized.quotes()])?;
129129
// We track where in the source docstring we are (in source code byte offsets)
130130
let mut offset = normalized.start();
131131

@@ -141,7 +141,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
141141

142142
// Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
143143
// inner quotes and closing quotes from getting to close to avoid `""""content`
144-
if trim_both.starts_with(normalized.quotes.quote_char.as_char()) {
144+
if trim_both.starts_with(normalized.quotes().quote_char.as_char()) {
145145
space().fmt(f)?;
146146
}
147147

@@ -168,7 +168,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
168168
{
169169
space().fmt(f)?;
170170
}
171-
normalized.quotes.fmt(f)?;
171+
normalized.quotes().fmt(f)?;
172172
return Ok(());
173173
}
174174

@@ -194,7 +194,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
194194
offset,
195195
stripped_indentation,
196196
already_normalized,
197-
quote_char: normalized.quotes.quote_char,
197+
quote_char: normalized.quotes().quote_char,
198198
code_example: CodeExample::default(),
199199
}
200200
.add_iter(lines)?;
@@ -207,7 +207,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
207207
space().fmt(f)?;
208208
}
209209

210-
write!(f, [normalized.quotes])
210+
write!(f, [normalized.quotes()])
211211
}
212212

213213
fn contains_unescaped_newline(haystack: &str) -> bool {
@@ -1569,7 +1569,7 @@ fn docstring_format_source(
15691569
/// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
15701570
/// so `content\\ """` doesn't need a space while `content\\\ """` does.
15711571
fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
1572-
trim_end.ends_with(normalized.quotes.quote_char.as_char())
1572+
trim_end.ends_with(normalized.quotes().quote_char.as_char())
15731573
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
15741574
}
15751575

0 commit comments

Comments
 (0)