Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions crates/ruff/resources/test/fixtures/pycodestyle/W605_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Same as `W605_0.py` but using f-strings instead.

#: W605:1:10
regex = f'\.png$'

#: W605:2:1
regex = f'''
\.png$
'''

#: W605:2:6
f(
f'\_'
)

#: W605:4:6
f"""
multi-line
literal
with \_ somewhere
in the middle
"""

#: W605:1:38
value = f'new line\nand invalid escape \_ here'


#: Okay
regex = fr'\.png$'
regex = f'\\.png$'
regex = fr'''
\.png$
'''
regex = fr'''
\\.png$
'''
s = f'\\'
regex = f'\w' # noqa
regex = f'''
\w
''' # noqa

regex = f'\\\_'
value = f'\{{1}}'
value = f'\{1}'
value = f'{1:\}'
value = f"{f"\{1}"}"
value = rf"{f"\{1}"}"

# Okay
value = rf'\{{1}}'
value = rf'\{1}'
value = rf'{1:\}'
value = f"{rf"\{1}"}"
16 changes: 8 additions & 8 deletions crates/ruff/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,14 @@ pub(crate) fn check_tokens(

if settings.rules.enabled(Rule::InvalidEscapeSequence) {
for (tok, range) in tokens.iter().flatten() {
if tok.is_string() {
pycodestyle::rules::invalid_escape_sequence(
&mut diagnostics,
locator,
*range,
settings.rules.should_fix(Rule::InvalidEscapeSequence),
);
}
pycodestyle::rules::invalid_escape_sequence(
&mut diagnostics,
locator,
indexer,
tok,
*range,
settings.rules.should_fix(Rule::InvalidEscapeSequence),
);
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ mod tests {
#[test_case(Rule::BlankLineWithWhitespace, Path::new("W29.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_0.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_1.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_2.py"))]
#[test_case(Rule::LineTooLong, Path::new("E501.py"))]
#[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))]
Expand Down
86 changes: 65 additions & 21 deletions crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use memchr::memchr_iter;

use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_index::Indexer;
use ruff_python_parser::Tok;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

Expand Down Expand Up @@ -45,29 +46,32 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence {
pub(crate) fn invalid_escape_sequence(
diagnostics: &mut Vec<Diagnostic>,
locator: &Locator,
range: TextRange,
indexer: &Indexer,
token: &Tok,
token_range: TextRange,
autofix: bool,
) {
let text = locator.slice(range);

// Determine whether the string is single- or triple-quoted.
let Some(leading_quote) = leading_quote(text) else {
return;
};
let Some(trailing_quote) = trailing_quote(text) else {
return;
let token_source_code = match token {
Tok::FStringMiddle { value, is_raw } => {
if *is_raw {
return;
}
value.as_str()
}
Tok::String { kind, .. } => {
if kind.is_raw() {
return;
}
locator.slice(token_range)
}
_ => return,
};
let body = &text[leading_quote.len()..text.len() - trailing_quote.len()];

if leading_quote.contains(['r', 'R']) {
return;
}

let mut contains_valid_escape_sequence = false;
let mut invalid_escape_sequence = Vec::new();

let mut prev = None;
let bytes = body.as_bytes();
let bytes = token_source_code.as_bytes();
for i in memchr_iter(b'\\', bytes) {
// If the previous character was also a backslash, skip.
if prev.is_some_and(|prev| prev == i - 1) {
Expand All @@ -77,9 +81,38 @@ pub(crate) fn invalid_escape_sequence(

prev = Some(i);

let Some(next_char) = body[i + 1..].chars().next() else {
let next_char = match token_source_code[i + 1..].chars().next() {
Some(next_char) => next_char,
None if token.is_f_string_middle() => {
// If we're at the end of a f-string middle token, the next character
// is actually emitted as a different token. For example,
//
// ```python
// f"\{1}"
// ```
//
// is lexed as `FStringMiddle('\\')` and `LBrace` (ignoring irrelevant
// tokens), so we need to check the next character in the source code.
//
// Now, if we're at the end of the f-string itself, the lexer wouldn't
// have emitted the `FStringMiddle` token in the first place. For example,
//
// ```python
// f"foo\"
// ```
//
// Here, there won't be any `FStringMiddle` because it's an unterminated
// f-string. This means that if there's a `FStringMiddle` token and we
// encounter a `\` character, then the next character is always going to
// be part of the f-string.
if let Some(next_char) = locator.after(token_range.end()).chars().next() {
next_char
} else {
continue;
}
}
// If we're at the end of the file, skip.
continue;
None => continue,
};

// If we're at the end of line, skip.
Expand Down Expand Up @@ -120,7 +153,7 @@ pub(crate) fn invalid_escape_sequence(
continue;
}

let location = range.start() + leading_quote.text_len() + TextSize::try_from(i).unwrap();
let location = token_range.start() + TextSize::try_from(i).unwrap();
let range = TextRange::at(location, next_char.text_len() + TextSize::from(1));
invalid_escape_sequence.push(Diagnostic::new(InvalidEscapeSequence(next_char), range));
}
Expand All @@ -135,14 +168,25 @@ pub(crate) fn invalid_escape_sequence(
)));
}
} else {
let tok_start = if token.is_f_string_middle() {
// SAFETY: If this is a `FStringMiddle` token, then the indexer
// must have the f-string range.
indexer
.fstring_ranges()
.innermost(token_range.start())
.unwrap()
.start()
} else {
token_range.start()
};
// Turn into raw string.
for diagnostic in &mut invalid_escape_sequence {
// If necessary, add a space between any leading keyword (`return`, `yield`,
// `assert`, etc.) and the string. For example, `return"foo"` is valid, but
// `returnr"foo"` is not.
diagnostic.set_fix(Fix::automatic(Edit::insertion(
pad_start("r".to_string(), range.start(), locator),
range.start(),
pad_start("r".to_string(), tok_start, locator),
tok_start,
)));
}
}
Expand Down
Loading