Skip to content

Commit 100904a

Browse files
authored
Avoid parsing other parts of a format specification if replacements are present (#6858)
Closes #6767 Replaces #6773 (this cherry-picks some parts from there) Alternative to the approach introduced in #6616 which added support for placeholders in format specifications while retaining parsing of other format specification parts. The idea is that if there are placeholders in a format specification we will not attempt to glean semantic meaning from the other parts of the format specification we'll just extract all of the placeholders ignoring other characters. The dynamic content of placeholders can drastically change the meaning of the format specification in ways unknowable by static analysis. This change prevents false analysis and will ensure safety if we build other rules on top of this at the cost of missing detection of some bad specifications. Minor note: I've use "replacements" and "placeholders" interchangeably but am trying to go with "placeholder" as I think it's a better term for the static analysis concept here
1 parent 0bac7bd commit 100904a

File tree

4 files changed

+87
-99
lines changed

4 files changed

+87
-99
lines changed

crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
"{:s} {:y}".format("hello", "world") # [bad-format-character]
1616

1717
"{:*^30s}".format("centered") # OK
18-
"{:{s}}".format("hello", s="s") # OK (nested replacement value not checked)
19-
20-
"{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested replacement format spec checked)
18+
"{:{s}}".format("hello", s="s") # OK (nested placeholder value not checked)
19+
"{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested placeholder format spec checked)
20+
"{0:.{prec}g}".format(1.23, prec=15) # OK (cannot validate after nested placeholder)
21+
"{0:.{foo}{bar}{foobar}y}".format(...) # OK (cannot validate after nested placeholders)
22+
"{0:.{foo}x{bar}y{foobar}g}".format(...) # OK (all nested placeholders are consumed without considering in between chars)
2123

2224
## f-strings
2325

crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,14 @@ pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) {
6363
));
6464
}
6565
Err(_) => {}
66-
Ok(format_spec) => {
67-
for replacement in format_spec.replacements() {
68-
let FormatPart::Field { format_spec, .. } = replacement else {
66+
Ok(FormatSpec::Static(_)) => {}
67+
Ok(FormatSpec::Dynamic(format_spec)) => {
68+
for placeholder in format_spec.placeholders {
69+
let FormatPart::Field { format_spec, .. } = placeholder else {
6970
continue;
7071
};
7172
if let Err(FormatSpecError::InvalidFormatType) =
72-
FormatSpec::parse(format_spec)
73+
FormatSpec::parse(&format_spec)
7374
{
7475
checker.diagnostics.push(Diagnostic::new(
7576
BadStringFormatCharacter {

crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap

+6-6
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,14 @@ bad_string_format_character.py:15:1: PLE1300 Unsupported format character 'y'
5151
17 | "{:*^30s}".format("centered") # OK
5252
|
5353

54-
bad_string_format_character.py:20:1: PLE1300 Unsupported format character 'y'
54+
bad_string_format_character.py:19:1: PLE1300 Unsupported format character 'y'
5555
|
56-
18 | "{:{s}}".format("hello", s="s") # OK (nested replacement value not checked)
57-
19 |
58-
20 | "{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested replacement format spec checked)
56+
17 | "{:*^30s}".format("centered") # OK
57+
18 | "{:{s}}".format("hello", s="s") # OK (nested placeholder value not checked)
58+
19 | "{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested placeholder format spec checked)
5959
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300
60-
21 |
61-
22 | ## f-strings
60+
20 | "{0:.{prec}g}".format(1.23, prec=15) # OK (cannot validate after nested placeholder)
61+
21 | "{0:.{foo}{bar}{foobar}y}".format(...) # OK (cannot validate after nested placeholders)
6262
|
6363

6464

crates/ruff_python_literal/src/format.rs

+71-86
Original file line numberDiff line numberDiff line change
@@ -190,30 +190,35 @@ impl FormatParse for FormatType {
190190
/// "hello {name:<20}".format(name="test")
191191
/// ```
192192
///
193-
/// Format specifications allow nested replacements for dynamic formatting.
193+
/// Format specifications allow nested placeholders for dynamic formatting.
194194
/// For example, the following statements are equivalent:
195195
/// ```python
196196
/// "hello {name:{fmt}}".format(name="test", fmt="<20")
197197
/// "hello {name:{align}{width}}".format(name="test", align="<", width="20")
198198
/// "hello {name:<20{empty}>}".format(name="test", empty="")
199199
/// ```
200200
///
201-
/// Nested replacements can include additional format specifiers.
201+
/// Nested placeholders can include additional format specifiers.
202202
/// ```python
203203
/// "hello {name:{fmt:*>}}".format(name="test", fmt="<20")
204204
/// ```
205205
///
206-
/// However, replacements can only be singly nested (preserving our sanity).
206+
/// However, placeholders can only be singly nested (preserving our sanity).
207207
/// A [`FormatSpecError::PlaceholderRecursionExceeded`] will be raised while parsing in this case.
208208
/// ```python
209209
/// "hello {name:{fmt:{not_allowed}}}".format(name="test", fmt="<20") # Syntax error
210210
/// ```
211211
///
212-
/// When replacements are present in a format specification, we will parse them and
213-
/// store them in [`FormatSpec`] but will otherwise ignore them if they would introduce
214-
/// an invalid format specification at runtime.
212+
/// When placeholders are present in a format specification, parsing will return a [`DynamicFormatSpec`]
213+
/// and avoid attempting to parse any of the clauses. Otherwise, a [`StaticFormatSpec`] will be used.
215214
#[derive(Debug, PartialEq)]
216-
pub struct FormatSpec {
215+
pub enum FormatSpec {
216+
Static(StaticFormatSpec),
217+
Dynamic(DynamicFormatSpec),
218+
}
219+
220+
#[derive(Debug, PartialEq)]
221+
pub struct StaticFormatSpec {
217222
// Ex) `!s` in `'{!s}'`
218223
conversion: Option<FormatConversion>,
219224
// Ex) `*` in `'{:*^30}'`
@@ -232,8 +237,12 @@ pub struct FormatSpec {
232237
precision: Option<usize>,
233238
// Ex) `f` in `'{:+f}'`
234239
format_type: Option<FormatType>,
240+
}
241+
242+
#[derive(Debug, PartialEq)]
243+
pub struct DynamicFormatSpec {
235244
// Ex) `x` and `y` in `'{:*{x},{y}b}'`
236-
replacements: Vec<FormatPart>,
245+
pub placeholders: Vec<FormatPart>,
237246
}
238247

239248
#[derive(Copy, Clone, Debug, PartialEq, Default)]
@@ -318,43 +327,46 @@ fn parse_precision(text: &str) -> Result<(Option<usize>, &str), FormatSpecError>
318327
})
319328
}
320329

321-
/// Parses a format part within a format spec
322-
fn parse_nested_placeholder<'a>(
323-
parts: &mut Vec<FormatPart>,
324-
text: &'a str,
325-
) -> Result<&'a str, FormatSpecError> {
330+
/// Parses a placeholder format part within a format specification
331+
fn parse_nested_placeholder(text: &str) -> Result<Option<(FormatPart, &str)>, FormatSpecError> {
326332
match FormatString::parse_spec(text, AllowPlaceholderNesting::No) {
327-
// Not a nested replacement, OK
328-
Err(FormatParseError::MissingStartBracket) => Ok(text),
333+
// Not a nested placeholder, OK
334+
Err(FormatParseError::MissingStartBracket) => Ok(None),
329335
Err(err) => Err(FormatSpecError::InvalidPlaceholder(err)),
330-
Ok((format_part, text)) => {
331-
parts.push(format_part);
332-
Ok(text)
336+
Ok((format_part, text)) => Ok(Some((format_part, text))),
337+
}
338+
}
339+
340+
/// Parse all placeholders in a format specification
341+
/// If no placeholders are present, an empty vector will be returned
342+
fn parse_nested_placeholders(mut text: &str) -> Result<Vec<FormatPart>, FormatSpecError> {
343+
let mut placeholders = vec![];
344+
while let Some(bracket) = text.find('{') {
345+
if let Some((format_part, rest)) = parse_nested_placeholder(&text[bracket..])? {
346+
text = rest;
347+
placeholders.push(format_part);
348+
} else {
349+
text = &text[bracket + 1..];
333350
}
334351
}
352+
Ok(placeholders)
335353
}
336354

337355
impl FormatSpec {
338356
pub fn parse(text: &str) -> Result<Self, FormatSpecError> {
339-
let mut replacements = vec![];
340-
// get_integer in CPython
341-
let text = parse_nested_placeholder(&mut replacements, text)?;
357+
let placeholders = parse_nested_placeholders(text)?;
358+
if !placeholders.is_empty() {
359+
return Ok(FormatSpec::Dynamic(DynamicFormatSpec { placeholders }));
360+
}
361+
342362
let (conversion, text) = FormatConversion::parse(text);
343-
let text = parse_nested_placeholder(&mut replacements, text)?;
344363
let (mut fill, mut align, text) = parse_fill_and_align(text);
345-
let text = parse_nested_placeholder(&mut replacements, text)?;
346364
let (sign, text) = FormatSign::parse(text);
347-
let text = parse_nested_placeholder(&mut replacements, text)?;
348365
let (alternate_form, text) = parse_alternate_form(text);
349-
let text = parse_nested_placeholder(&mut replacements, text)?;
350366
let (zero, text) = parse_zero(text);
351-
let text = parse_nested_placeholder(&mut replacements, text)?;
352367
let (width, text) = parse_number(text)?;
353-
let text = parse_nested_placeholder(&mut replacements, text)?;
354368
let (grouping_option, text) = FormatGrouping::parse(text);
355-
let text = parse_nested_placeholder(&mut replacements, text)?;
356369
let (precision, text) = parse_precision(text)?;
357-
let text = parse_nested_placeholder(&mut replacements, text)?;
358370

359371
let (format_type, _text) = if text.is_empty() {
360372
(None, text)
@@ -376,7 +388,7 @@ impl FormatSpec {
376388
align = align.or(Some(FormatAlign::AfterSign));
377389
}
378390

379-
Ok(FormatSpec {
391+
Ok(FormatSpec::Static(StaticFormatSpec {
380392
conversion,
381393
fill,
382394
align,
@@ -386,12 +398,7 @@ impl FormatSpec {
386398
grouping_option,
387399
precision,
388400
format_type,
389-
replacements,
390-
})
391-
}
392-
393-
pub fn replacements(&self) -> &[FormatPart] {
394-
return self.replacements.as_slice();
401+
}))
395402
}
396403
}
397404

@@ -437,7 +444,7 @@ impl std::fmt::Display for FormatParseError {
437444
std::write!(fmt, "unescaped start bracket in literal")
438445
}
439446
Self::PlaceholderRecursionExceeded => {
440-
std::write!(fmt, "multiply nested replacement not allowed")
447+
std::write!(fmt, "multiply nested placeholder not allowed")
441448
}
442449
Self::UnknownConversion => {
443450
std::write!(fmt, "unknown conversion")
@@ -730,7 +737,7 @@ mod tests {
730737

731738
#[test]
732739
fn test_width_only() {
733-
let expected = Ok(FormatSpec {
740+
let expected = Ok(FormatSpec::Static(StaticFormatSpec {
734741
conversion: None,
735742
fill: None,
736743
align: None,
@@ -740,14 +747,13 @@ mod tests {
740747
grouping_option: None,
741748
precision: None,
742749
format_type: None,
743-
replacements: vec![],
744-
});
750+
}));
745751
assert_eq!(FormatSpec::parse("33"), expected);
746752
}
747753

748754
#[test]
749755
fn test_fill_and_width() {
750-
let expected = Ok(FormatSpec {
756+
let expected = Ok(FormatSpec::Static(StaticFormatSpec {
751757
conversion: None,
752758
fill: Some('<'),
753759
align: Some(FormatAlign::Right),
@@ -757,45 +763,26 @@ mod tests {
757763
grouping_option: None,
758764
precision: None,
759765
format_type: None,
760-
replacements: vec![],
761-
});
766+
}));
762767
assert_eq!(FormatSpec::parse("<>33"), expected);
763768
}
764769

765770
#[test]
766771
fn test_format_part() {
767-
let expected = Ok(FormatSpec {
768-
conversion: None,
769-
fill: None,
770-
align: None,
771-
sign: None,
772-
alternate_form: false,
773-
width: None,
774-
grouping_option: None,
775-
precision: None,
776-
format_type: None,
777-
replacements: vec![FormatPart::Field {
772+
let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec {
773+
placeholders: vec![FormatPart::Field {
778774
field_name: "x".to_string(),
779775
conversion_spec: None,
780776
format_spec: String::new(),
781777
}],
782-
});
778+
}));
783779
assert_eq!(FormatSpec::parse("{x}"), expected);
784780
}
785781

786782
#[test]
787-
fn test_format_parts() {
788-
let expected = Ok(FormatSpec {
789-
conversion: None,
790-
fill: None,
791-
align: None,
792-
sign: None,
793-
alternate_form: false,
794-
width: None,
795-
grouping_option: None,
796-
precision: None,
797-
format_type: None,
798-
replacements: vec![
783+
fn test_dynamic_format_spec() {
784+
let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec {
785+
placeholders: vec![
799786
FormatPart::Field {
800787
field_name: "x".to_string(),
801788
conversion_spec: None,
@@ -812,34 +799,25 @@ mod tests {
812799
format_spec: String::new(),
813800
},
814801
],
815-
});
802+
}));
816803
assert_eq!(FormatSpec::parse("{x}{y:<2}{z}"), expected);
817804
}
818805

819806
#[test]
820-
fn test_format_part_with_others() {
821-
let expected = Ok(FormatSpec {
822-
conversion: None,
823-
fill: None,
824-
align: Some(FormatAlign::Left),
825-
sign: None,
826-
alternate_form: false,
827-
width: Some(20),
828-
grouping_option: None,
829-
precision: None,
830-
format_type: Some(FormatType::Binary),
831-
replacements: vec![FormatPart::Field {
807+
fn test_dynamic_format_spec_with_others() {
808+
let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec {
809+
placeholders: vec![FormatPart::Field {
832810
field_name: "x".to_string(),
833811
conversion_spec: None,
834812
format_spec: String::new(),
835813
}],
836-
});
814+
}));
837815
assert_eq!(FormatSpec::parse("<{x}20b"), expected);
838816
}
839817

840818
#[test]
841819
fn test_all() {
842-
let expected = Ok(FormatSpec {
820+
let expected = Ok(FormatSpec::Static(StaticFormatSpec {
843821
conversion: None,
844822
fill: Some('<'),
845823
align: Some(FormatAlign::Right),
@@ -849,8 +827,7 @@ mod tests {
849827
grouping_option: Some(FormatGrouping::Comma),
850828
precision: Some(11),
851829
format_type: Some(FormatType::Binary),
852-
replacements: vec![],
853-
});
830+
}));
854831
assert_eq!(FormatSpec::parse("<>-#23,.11b"), expected);
855832
}
856833

@@ -877,7 +854,7 @@ mod tests {
877854
}
878855

879856
#[test]
880-
fn test_format_parse_nested_replacement() {
857+
fn test_format_parse_nested_placeholder() {
881858
let expected = Ok(FormatString {
882859
format_parts: vec![
883860
FormatPart::Literal("abcd".to_owned()),
@@ -966,7 +943,15 @@ mod tests {
966943
);
967944
assert_eq!(
968945
FormatSpec::parse("{}}"),
969-
Err(FormatSpecError::InvalidFormatType)
946+
// Note this should be an `InvalidFormatType` but we give up
947+
// on all other parsing validation when we see a placeholder
948+
Ok(FormatSpec::Dynamic(DynamicFormatSpec {
949+
placeholders: vec![FormatPart::Field {
950+
field_name: String::new(),
951+
conversion_spec: None,
952+
format_spec: String::new()
953+
}]
954+
}))
970955
);
971956
assert_eq!(
972957
FormatSpec::parse("{{x}}"),

0 commit comments

Comments
 (0)