Skip to content

Commit 4b4ae43

Browse files
Fix incorrect linenos on fstring tokens with escaped newlines (#4423)
I don't think this can affect Black itself much (maybe for formatting ranges), but I ran into this with https://github.com/JelleZijlstra/lib2toast
1 parent 7fa1faf commit 4b4ae43

File tree

3 files changed

+124
-1
lines changed

3 files changed

+124
-1
lines changed

CHANGES.md

+3
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939

4040
- Fix bug with Black incorrectly parsing empty lines with a backslash (#4343)
4141

42+
- Fix incorrect line numbers in the tokenizer for certain tokens within f-strings
43+
(#4423)
44+
4245
### Performance
4346

4447
<!-- Changes that improve Black's performance. -->

src/blib2to3/pgen2/tokenize.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ def generate_tokens(
638638
else:
639639
if is_fstring_start(token):
640640
fstring_start, token = _split_fstring_start_and_middle(token)
641-
fstring_start_epos = (lnum, spos[1] + len(fstring_start))
641+
fstring_start_epos = (spos[0], spos[1] + len(fstring_start))
642642
yield (
643643
FSTRING_START,
644644
fstring_start,

tests/test_tokenize.py

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
"""Tests for the blib2to3 tokenizer."""
2+
3+
import io
4+
import sys
5+
import textwrap
6+
from dataclasses import dataclass
7+
from typing import List
8+
9+
import black
10+
from blib2to3.pgen2 import token, tokenize
11+
12+
13+
@dataclass
14+
class Token:
15+
type: str
16+
string: str
17+
start: tokenize.Coord
18+
end: tokenize.Coord
19+
20+
21+
def get_tokens(text: str) -> List[Token]:
22+
"""Return the tokens produced by the tokenizer."""
23+
readline = io.StringIO(text).readline
24+
tokens: List[Token] = []
25+
26+
def tokeneater(
27+
type: int, string: str, start: tokenize.Coord, end: tokenize.Coord, line: str
28+
) -> None:
29+
tokens.append(Token(token.tok_name[type], string, start, end))
30+
31+
tokenize.tokenize(readline, tokeneater)
32+
return tokens
33+
34+
35+
def assert_tokenizes(text: str, tokens: List[Token]) -> None:
36+
"""Assert that the tokenizer produces the expected tokens."""
37+
actual_tokens = get_tokens(text)
38+
assert actual_tokens == tokens
39+
40+
41+
def test_simple() -> None:
42+
assert_tokenizes(
43+
"1",
44+
[Token("NUMBER", "1", (1, 0), (1, 1)), Token("ENDMARKER", "", (2, 0), (2, 0))],
45+
)
46+
assert_tokenizes(
47+
"'a'",
48+
[
49+
Token("STRING", "'a'", (1, 0), (1, 3)),
50+
Token("ENDMARKER", "", (2, 0), (2, 0)),
51+
],
52+
)
53+
assert_tokenizes(
54+
"a",
55+
[Token("NAME", "a", (1, 0), (1, 1)), Token("ENDMARKER", "", (2, 0), (2, 0))],
56+
)
57+
58+
59+
def test_fstring() -> None:
60+
assert_tokenizes(
61+
'f"x"',
62+
[
63+
Token("FSTRING_START", 'f"', (1, 0), (1, 2)),
64+
Token("FSTRING_MIDDLE", "x", (1, 2), (1, 3)),
65+
Token("FSTRING_END", '"', (1, 3), (1, 4)),
66+
Token("ENDMARKER", "", (2, 0), (2, 0)),
67+
],
68+
)
69+
assert_tokenizes(
70+
'f"{x}"',
71+
[
72+
Token("FSTRING_START", 'f"', (1, 0), (1, 2)),
73+
Token("FSTRING_MIDDLE", "", (1, 2), (1, 2)),
74+
Token("LBRACE", "{", (1, 2), (1, 3)),
75+
Token("NAME", "x", (1, 3), (1, 4)),
76+
Token("RBRACE", "}", (1, 4), (1, 5)),
77+
Token("FSTRING_MIDDLE", "", (1, 5), (1, 5)),
78+
Token("FSTRING_END", '"', (1, 5), (1, 6)),
79+
Token("ENDMARKER", "", (2, 0), (2, 0)),
80+
],
81+
)
82+
assert_tokenizes(
83+
'f"{x:y}"\n',
84+
[
85+
Token(type="FSTRING_START", string='f"', start=(1, 0), end=(1, 2)),
86+
Token(type="FSTRING_MIDDLE", string="", start=(1, 2), end=(1, 2)),
87+
Token(type="LBRACE", string="{", start=(1, 2), end=(1, 3)),
88+
Token(type="NAME", string="x", start=(1, 3), end=(1, 4)),
89+
Token(type="OP", string=":", start=(1, 4), end=(1, 5)),
90+
Token(type="FSTRING_MIDDLE", string="y", start=(1, 5), end=(1, 6)),
91+
Token(type="RBRACE", string="}", start=(1, 6), end=(1, 7)),
92+
Token(type="FSTRING_MIDDLE", string="", start=(1, 7), end=(1, 7)),
93+
Token(type="FSTRING_END", string='"', start=(1, 7), end=(1, 8)),
94+
Token(type="NEWLINE", string="\n", start=(1, 8), end=(1, 9)),
95+
Token(type="ENDMARKER", string="", start=(2, 0), end=(2, 0)),
96+
],
97+
)
98+
assert_tokenizes(
99+
'f"x\\\n{a}"\n',
100+
[
101+
Token(type="FSTRING_START", string='f"', start=(1, 0), end=(1, 2)),
102+
Token(type="FSTRING_MIDDLE", string="x\\\n", start=(1, 2), end=(2, 0)),
103+
Token(type="LBRACE", string="{", start=(2, 0), end=(2, 1)),
104+
Token(type="NAME", string="a", start=(2, 1), end=(2, 2)),
105+
Token(type="RBRACE", string="}", start=(2, 2), end=(2, 3)),
106+
Token(type="FSTRING_MIDDLE", string="", start=(2, 3), end=(2, 3)),
107+
Token(type="FSTRING_END", string='"', start=(2, 3), end=(2, 4)),
108+
Token(type="NEWLINE", string="\n", start=(2, 4), end=(2, 5)),
109+
Token(type="ENDMARKER", string="", start=(3, 0), end=(3, 0)),
110+
],
111+
)
112+
113+
114+
# Run "echo some code | python tests/test_tokenize.py" to generate test cases.
115+
if __name__ == "__main__":
116+
code = sys.stdin.read()
117+
tokens = get_tokens(code)
118+
text = f"assert_tokenizes({code!r}, {tokens!r})"
119+
text = black.format_str(text, mode=black.Mode())
120+
print(textwrap.indent(text, " "))

0 commit comments

Comments
 (0)