diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d2335a9b64f..72191df62a9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -53,7 +53,7 @@ repos: # v8.2 has breaking changes. We work around them at runtime, but we need the newer stubs. - packaging >= 22.0 - platformdirs >= 2.1.0 - - pytokens >= 0.1.10 + - pytokens >= 0.3.0 - pytest - hypothesis - aiohttp >= 3.7.4 diff --git a/CHANGES.md b/CHANGES.md index 303d6849bcb..921fc3efdd8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,6 +10,7 @@ - Enable base 3.14 support (#4804) +- Add support for the new Python 3.14 t-string syntax introduced by PEP 750 (#4805) ### Stable style diff --git a/pyproject.toml b/pyproject.toml index ed787d2422f..93c80aa7518 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ dependencies = [ "packaging>=22.0", "pathspec>=0.9.0", "platformdirs>=2", - "pytokens>=0.1.10", + "pytokens>=0.3.0", "tomli>=1.1.0; python_version < '3.11'", "typing_extensions>=4.0.1; python_version < '3.11'", ] diff --git a/src/black/__init__.py b/src/black/__init__.py index 079e95cf386..b71c1a6bdbd 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -1248,6 +1248,7 @@ def _format_str_once( for feature in { Feature.PARENTHESIZED_CONTEXT_MANAGERS, Feature.UNPARENTHESIZED_EXCEPT_TYPES, + Feature.T_STRINGS, } if supports_feature(versions, feature) } @@ -1364,6 +1365,8 @@ def get_features_used( # noqa: C901 for n in node.pre_order(): if n.type == token.FSTRING_START: features.add(Feature.F_STRINGS) + elif n.type == token.TSTRING_START: + features.add(Feature.T_STRINGS) elif ( n.type == token.RBRACE and n.parent is not None diff --git a/src/black/linegen.py b/src/black/linegen.py index c2fd5f6858e..cf171c12c62 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -38,7 +38,7 @@ WHITESPACE, Visitor, ensure_visible, - fstring_to_string, + fstring_tstring_to_string, get_annotation_type, has_sibling_with_type, is_arith_like, @@ -560,7 +560,22 @@ def visit_atom(self, node: Node) -> Iterator[Line]: def visit_fstring(self, node: Node) -> Iterator[Line]: # currently we don't want to format and split f-strings at all. - string_leaf = fstring_to_string(node) + string_leaf = fstring_tstring_to_string(node) + node.replace(string_leaf) + if "\\" in string_leaf.value and any( + "\\" in str(child) + for child in node.children + if child.type == syms.fstring_replacement_field + ): + # string normalization doesn't account for nested quotes, + # causing breakages. skip normalization when nested quotes exist + yield from self.visit_default(string_leaf) + return + yield from self.visit_STRING(string_leaf) + + def visit_tstring(self, node: Node) -> Iterator[Line]: + # currently we don't want to format and split t-strings at all. + string_leaf = fstring_tstring_to_string(node) node.replace(string_leaf) if "\\" in string_leaf.value and any( "\\" in str(child) diff --git a/src/black/lines.py b/src/black/lines.py index 436f5ded64d..1d947ad37b5 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -64,8 +64,8 @@ def append( """ has_value = ( leaf.type in BRACKETS - # empty fstring-middles must not be truncated - or leaf.type == token.FSTRING_MIDDLE + # empty fstring and tstring middles must not be truncated + or leaf.type in (token.FSTRING_MIDDLE, token.TSTRING_MIDDLE) or bool(leaf.value.strip()) ) if not has_value: diff --git a/src/black/mode.py b/src/black/mode.py index 543358a3482..c7be0466f0b 100644 --- a/src/black/mode.py +++ b/src/black/mode.py @@ -52,9 +52,10 @@ class Feature(Enum): DEBUG_F_STRINGS = 16 PARENTHESIZED_CONTEXT_MANAGERS = 17 TYPE_PARAMS = 18 - FSTRING_PARSING = 19 + # FSTRING_PARSING = 19 # unused TYPE_PARAM_DEFAULTS = 20 UNPARENTHESIZED_EXCEPT_TYPES = 21 + T_STRINGS = 22 FORCE_OPTIONAL_PARENTHESES = 50 # __future__ flags @@ -165,7 +166,6 @@ class Feature(Enum): Feature.EXCEPT_STAR, Feature.VARIADIC_GENERICS, Feature.TYPE_PARAMS, - Feature.FSTRING_PARSING, }, TargetVersion.PY313: { Feature.F_STRINGS, @@ -185,7 +185,6 @@ class Feature(Enum): Feature.EXCEPT_STAR, Feature.VARIADIC_GENERICS, Feature.TYPE_PARAMS, - Feature.FSTRING_PARSING, Feature.TYPE_PARAM_DEFAULTS, }, TargetVersion.PY314: { @@ -206,9 +205,9 @@ class Feature(Enum): Feature.EXCEPT_STAR, Feature.VARIADIC_GENERICS, Feature.TYPE_PARAMS, - Feature.FSTRING_PARSING, Feature.TYPE_PARAM_DEFAULTS, Feature.UNPARENTHESIZED_EXCEPT_TYPES, + Feature.T_STRINGS, }, } diff --git a/src/black/nodes.py b/src/black/nodes.py index c6e618acadb..76d565c90dc 100644 --- a/src/black/nodes.py +++ b/src/black/nodes.py @@ -140,6 +140,8 @@ STANDALONE_COMMENT, token.FSTRING_MIDDLE, token.FSTRING_END, + token.TSTRING_MIDDLE, + token.TSTRING_END, token.BANG, } @@ -207,7 +209,10 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no }: return NO - if t == token.LBRACE and p.type == syms.fstring_replacement_field: + if t == token.LBRACE and p.type in ( + syms.fstring_replacement_field, + syms.tstring_replacement_field, + ): return NO prev = leaf.prev_sibling @@ -395,7 +400,6 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument: return NO - # TODO: add fstring here? elif t in {token.NAME, token.NUMBER, token.STRING}: return NO @@ -789,8 +793,8 @@ def is_fstring(node: Node) -> bool: return node.type == syms.fstring -def fstring_to_string(node: Node) -> Leaf: - """Converts an fstring node back to a string node.""" +def fstring_tstring_to_string(node: Node) -> Leaf: + """Converts an fstring or tstring node back to a string node.""" string_without_prefix = str(node)[len(node.prefix) :] string_leaf = Leaf(token.STRING, string_without_prefix, prefix=node.prefix) string_leaf.lineno = node.get_lineno() or 0 @@ -800,7 +804,7 @@ def fstring_to_string(node: Node) -> Leaf: def is_multiline_string(node: LN) -> bool: """Return True if `leaf` is a multiline string that actually spans many lines.""" if isinstance(node, Node) and is_fstring(node): - leaf = fstring_to_string(node) + leaf = fstring_tstring_to_string(node) elif isinstance(node, Leaf): leaf = node else: diff --git a/src/black/strings.py b/src/black/strings.py index 7e47f13062a..2dac4260c06 100644 --- a/src/black/strings.py +++ b/src/black/strings.py @@ -11,7 +11,7 @@ from black._width_table import WIDTH_TABLE from blib2to3.pytree import Leaf -STRING_PREFIX_CHARS: Final = "furbFURB" # All possible string prefix characters. +STRING_PREFIX_CHARS: Final = "fturbFTURB" # All possible string prefix characters. STRING_PREFIX_RE: Final = re.compile( r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL ) diff --git a/src/blib2to3/Grammar.txt b/src/blib2to3/Grammar.txt index 406a21f764d..286b762ef05 100644 --- a/src/blib2to3/Grammar.txt +++ b/src/blib2to3/Grammar.txt @@ -163,7 +163,7 @@ atom: ('(' [yield_expr|testlist_gexp] ')' | '[' [listmaker] ']' | '{' [dictsetmaker] '}' | '`' testlist1 '`' | - NAME | NUMBER | (STRING | fstring)+ | '.' '.' '.') + NAME | NUMBER | (STRING | fstring | tstring)+ | '.' '.' '.') listmaker: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] ) testlist_gexp: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] ) lambdef: 'lambda' [varargslist] ':' test @@ -259,3 +259,8 @@ fstring: FSTRING_START fstring_middle* FSTRING_END fstring_middle: fstring_replacement_field | FSTRING_MIDDLE fstring_replacement_field: '{' (yield_expr | testlist_star_expr) ['='] [ "!" NAME ] [ ':' fstring_format_spec* ] '}' fstring_format_spec: FSTRING_MIDDLE | fstring_replacement_field + +tstring: TSTRING_START tstring_middle* TSTRING_END +tstring_middle: tstring_replacement_field | TSTRING_MIDDLE +tstring_replacement_field: '{' (yield_expr | testlist_star_expr) ['='] [ "!" NAME ] [ ':' tstring_format_spec* ] '}' +tstring_format_spec: TSTRING_MIDDLE | tstring_replacement_field diff --git a/src/blib2to3/pgen2/driver.py b/src/blib2to3/pgen2/driver.py index 056fab2127b..7fb305d31be 100644 --- a/src/blib2to3/pgen2/driver.py +++ b/src/blib2to3/pgen2/driver.py @@ -168,9 +168,13 @@ def parse_tokens(self, tokens: Iterable[TokenInfo], debug: bool = False) -> NL: if type in {token.INDENT, token.DEDENT}: prefix = _prefix lineno, column = end - # FSTRING_MIDDLE is the only token that can end with a newline, and - # `end` will point to the next line. For that case, don't increment lineno. - if value.endswith("\n") and type != token.FSTRING_MIDDLE: + # FSTRING_MIDDLE and TSTRING_MIDDLE are the only token that can end with a + # newline, and `end` will point to the next line. For that case, don't + # increment lineno. + if value.endswith("\n") and type not in ( + token.FSTRING_MIDDLE, + token.TSTRING_MIDDLE, + ): lineno += 1 column = 0 else: diff --git a/src/blib2to3/pgen2/token.py b/src/blib2to3/pgen2/token.py index 10c7c63bfe9..8b531ee5c64 100644 --- a/src/blib2to3/pgen2/token.py +++ b/src/blib2to3/pgen2/token.py @@ -70,7 +70,10 @@ FSTRING_MIDDLE: Final = 61 FSTRING_END: Final = 62 BANG: Final = 63 -N_TOKENS: Final = 64 +TSTRING_START: Final = 64 +TSTRING_MIDDLE: Final = 65 +TSTRING_END: Final = 66 +N_TOKENS: Final = 67 NT_OFFSET: Final = 256 # --end constants-- diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index 375d1773397..2109757bb5e 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -38,7 +38,6 @@ COMMENT, DEDENT, ENDMARKER, - ERRORTOKEN, FSTRING_END, FSTRING_MIDDLE, FSTRING_START, @@ -49,6 +48,9 @@ NUMBER, OP, STRING, + TSTRING_END, + TSTRING_MIDDLE, + TSTRING_START, tok_name, ) @@ -91,6 +93,9 @@ TokenType.fstring_start: FSTRING_START, TokenType.fstring_middle: FSTRING_MIDDLE, TokenType.fstring_end: FSTRING_END, + TokenType.tstring_start: TSTRING_START, + TokenType.tstring_middle: TSTRING_MIDDLE, + TokenType.tstring_end: TSTRING_END, TokenType.endmarker: ENDMARKER, } @@ -186,6 +191,9 @@ def tokenize(source: str, grammar: Optional[Grammar] = None) -> Iterator[TokenIn source_line, ) else: + token_type = TOKEN_TYPE_MAP.get(token.type) + if token_type is None: + raise ValueError(f"Unknown token type: {token.type!r}") yield ( TOKEN_TYPE_MAP[token.type], token_str, diff --git a/src/blib2to3/pygram.py b/src/blib2to3/pygram.py index 70a5684bb07..7fd701ff426 100644 --- a/src/blib2to3/pygram.py +++ b/src/blib2to3/pygram.py @@ -126,6 +126,10 @@ class _python_symbols(Symbols): tname_star: int trailer: int try_stmt: int + tstring: int + tstring_format_spec: int + tstring_middle: int + tstring_replacement_field: int type_stmt: int typedargslist: int typeparam: int diff --git a/tests/data/cases/pep_750.py b/tests/data/cases/pep_750.py new file mode 100644 index 00000000000..ca992116586 --- /dev/null +++ b/tests/data/cases/pep_750.py @@ -0,0 +1,83 @@ +# flags: --minimum-version=3.14 +x = t"foo" +x = t'foo {{ {2 + 2}bar {{ baz' + +x = t"foo {f'abc'} bar" + +x = t"""foo {{ a + foo {2 + 2}bar {{ baz + + x = f"foo {{ { + 2 + 2 # comment + }bar" + + {{ baz + + }} buzz + + {print("abc" + "def" +)} +abc""" + +t'{(abc:=10)}' + +t'''This is a really long string, but just make sure that you reflow tstrings { + 2+2:d +}''' +t'This is a really long string, but just make sure that you reflow tstrings correctly {2+2:d}' + +t"{ 2 + 2 = }" + +t'{ +X +!r +}' + +tr'\{{\}}' + +t''' + WITH {f''' + {1}_cte AS ()'''} +''' + +# output +x = t"foo" +x = t"foo {{ {2 + 2}bar {{ baz" + +x = t"foo {f'abc'} bar" + +x = t"""foo {{ a + foo {2 + 2}bar {{ baz + + x = f"foo {{ { + 2 + 2 # comment + }bar" + + {{ baz + + }} buzz + + {print("abc" + "def" +)} +abc""" + +t"{(abc:=10)}" + +t"""This is a really long string, but just make sure that you reflow tstrings { + 2+2:d +}""" +t"This is a really long string, but just make sure that you reflow tstrings correctly {2+2:d}" + +t"{ 2 + 2 = }" + +t"{ +X +!r +}" + +rt"\{{\}}" + +t""" + WITH {f''' + {1}_cte AS ()'''} +""" diff --git a/tests/test_black.py b/tests/test_black.py index 291dc01421e..6f7052464ce 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -898,6 +898,9 @@ def test_get_features_used(self) -> None: self.check_features_used( "with ((a, ((b as c)))): pass", {Feature.PARENTHESIZED_CONTEXT_MANAGERS} ) + self.check_features_used( + "x = t'foo {f'bar'}'", {Feature.T_STRINGS, Feature.F_STRINGS} + ) def check_features_used(self, source: str, expected: set[Feature]) -> None: node = black.lib2to3_parse(source)