diff --git a/crates/ruff_python_parser/resources/inline/err/backslash_continuation_indentation_error.py b/crates/ruff_python_parser/resources/inline/err/backslash_continuation_indentation_error.py new file mode 100644 index 0000000000000..e9b294f49fc96 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/backslash_continuation_indentation_error.py @@ -0,0 +1,4 @@ +if True: + 1 + \ + 2 diff --git a/crates/ruff_python_parser/resources/inline/ok/backslash_continuation_indentation.py b/crates/ruff_python_parser/resources/inline/ok/backslash_continuation_indentation.py new file mode 100644 index 0000000000000..f9b9d7500996b --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/backslash_continuation_indentation.py @@ -0,0 +1,7 @@ +if True: + \ + 1 + \ +2 +else:\ + 3 diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 8b4b3a061c47d..31f4342d75570 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -263,7 +263,35 @@ impl<'src> Lexer<'src> { self.token_range(), ))); } - indentation = Indentation::root(); + // test_ok backslash_continuation_indentation + // if True: + // \ + // 1 + // \ + // 2 + // else:\ + // 3 + + // test_err backslash_continuation_indentation_error + // if True: + // 1 + // \ + // 2 + + // > Indentation cannot be split over multiple physical lines using backslashes; + // > the whitespace up to the first backslash determines the indentation. + // > + // > https://docs.python.org/3/reference/lexical_analysis.html#indentation + // + // Skip whitespace after the continuation-line without accumulating it into + // `indentation`. However, if the backslash is at column 0 (no prior + // indentation), let the loop continue so the next line's whitespace is + // accumulated normally. + // + // See also: https://github.com/python/cpython/issues/90249 + if indentation != Indentation::root() { + self.cursor.eat_while(is_python_whitespace); + } } // Form feed '\x0C' => { @@ -3061,4 +3089,71 @@ t"{(lambda x:{x})}" UnterminatedTripleQuotedString ); } + + #[test] + fn backslash_continuation_indentation() { + // The first `\` has 4 spaces before it which matches the indentation level at that point, + // so the whitespace before `2` is irrelevant and shouldn't produce an indentation error. + // Similarly, the second `\` is also at the same indentation level, so the `3` line is also + // valid. + let source = r"if True: + 1 + \ + 2 + \ +3 +else: + pass +" + .to_string(); + assert_snapshot!(lex_source(&source)); + } + + #[test] + fn backslash_continuation_at_root() { + // But, it's a different when the backslash character itself is at the root indentation + // level. Then, the whitespaces following it determines the indentation level of the next + // line, so `1` is indented with 4 spaces and `2` is indented with 8 spaces, and `3` is + // indented with 4 spaces, all of which are valid. + let source = r"if True: +\ + 1 + if True: +\ + 2 +else:\ + 3 +" + .to_string(); + assert_snapshot!(lex_source(&source)); + } + + #[test] + fn multiple_backslash_continuation() { + // It's only the first backslash character that determines the indentation level of the next + // line, so all the lines after the first `\` are indented with 4 spaces, and the remaining + // backslashes are just ignored and don't affect the indentation level. + let source = r"if True: + 1 + \ + \ + \ + \ + 2 +" + .to_string(); + assert_snapshot!(lex_source(&source)); + } + + #[test] + fn backslash_continuation_mismatch_indentation() { + // Indentation doesn't match any previous indentation level + let source = r"if True: + 1 + \ + 2 +" + .to_string(); + assert_snapshot!(lex_invalid(&source, Mode::Module)); + } } diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__backslash_continuation_at_root.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__backslash_continuation_at_root.snap new file mode 100644 index 0000000000000..4df1ec54926c3 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__backslash_continuation_at_root.snap @@ -0,0 +1,95 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: lex_source(&source) +--- +## Tokens +``` +[ + ( + If, + 0..2, + ), + ( + True, + 3..7, + ), + ( + Colon, + 7..8, + ), + ( + Newline, + 8..9, + ), + ( + Indent, + 9..15, + ), + ( + Int( + 1, + ), + 15..16, + ), + ( + Newline, + 16..17, + ), + ( + If, + 21..23, + ), + ( + True, + 24..28, + ), + ( + Colon, + 28..29, + ), + ( + Newline, + 29..30, + ), + ( + Indent, + 30..40, + ), + ( + Int( + 2, + ), + 40..41, + ), + ( + Newline, + 41..42, + ), + ( + Dedent, + 42..42, + ), + ( + Dedent, + 42..42, + ), + ( + Else, + 42..46, + ), + ( + Colon, + 46..47, + ), + ( + Int( + 3, + ), + 53..54, + ), + ( + Newline, + 54..55, + ), +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__backslash_continuation_indentation.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__backslash_continuation_indentation.snap new file mode 100644 index 0000000000000..6639f9cbac551 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__backslash_continuation_indentation.snap @@ -0,0 +1,91 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: lex_source(&source) +--- +## Tokens +``` +[ + ( + If, + 0..2, + ), + ( + True, + 3..7, + ), + ( + Colon, + 7..8, + ), + ( + Newline, + 8..9, + ), + ( + Indent, + 9..13, + ), + ( + Int( + 1, + ), + 13..14, + ), + ( + Newline, + 14..15, + ), + ( + Int( + 2, + ), + 29..30, + ), + ( + Newline, + 30..31, + ), + ( + Int( + 3, + ), + 37..38, + ), + ( + Newline, + 38..39, + ), + ( + Dedent, + 39..39, + ), + ( + Else, + 39..43, + ), + ( + Colon, + 43..44, + ), + ( + Newline, + 44..45, + ), + ( + Indent, + 45..49, + ), + ( + Pass, + 49..53, + ), + ( + Newline, + 53..54, + ), + ( + Dedent, + 54..54, + ), +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__backslash_continuation_mismatch_indentation.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__backslash_continuation_mismatch_indentation.snap new file mode 100644 index 0000000000000..8ffbbe16f55e6 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__backslash_continuation_mismatch_indentation.snap @@ -0,0 +1,62 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: "lex_invalid(&source, Mode::Module)" +--- +## Tokens +``` +[ + ( + If, + 0..2, + ), + ( + True, + 3..7, + ), + ( + Colon, + 7..8, + ), + ( + Newline, + 8..9, + ), + ( + Indent, + 9..13, + ), + ( + Int( + 1, + ), + 13..14, + ), + ( + Newline, + 14..15, + ), + ( + Unknown, + 15..23, + ), + ( + Int( + 2, + ), + 23..24, + ), + ( + Newline, + 24..25, + ), +] +``` +## Errors +``` +[ + LexicalError { + error: IndentationError, + location: 15..23, + }, +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__multiple_backslash_continuation.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__multiple_backslash_continuation.snap new file mode 100644 index 0000000000000..edcef8da028b6 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__multiple_backslash_continuation.snap @@ -0,0 +1,53 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: lex_source(&source) +--- +## Tokens +``` +[ + ( + If, + 0..2, + ), + ( + True, + 3..7, + ), + ( + Colon, + 7..8, + ), + ( + Newline, + 8..9, + ), + ( + Indent, + 9..13, + ), + ( + Int( + 1, + ), + 13..14, + ), + ( + Newline, + 14..15, + ), + ( + Int( + 2, + ), + 55..56, + ), + ( + Newline, + 56..57, + ), + ( + Dedent, + 57..57, + ), +] +``` diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@backslash_continuation_indentation_error.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@backslash_continuation_indentation_error.py.snap new file mode 100644 index 0000000000000..5694d581cc708 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@backslash_continuation_indentation_error.py.snap @@ -0,0 +1,77 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +--- +## AST + +``` +Module( + ModModule { + node_index: NodeIndex(None), + range: 0..29, + body: [ + If( + StmtIf { + node_index: NodeIndex(None), + range: 0..28, + test: BooleanLiteral( + ExprBooleanLiteral { + node_index: NodeIndex(None), + range: 3..7, + value: true, + }, + ), + body: [ + Expr( + StmtExpr { + node_index: NodeIndex(None), + range: 13..14, + value: NumberLiteral( + ExprNumberLiteral { + node_index: NodeIndex(None), + range: 13..14, + value: Int( + 1, + ), + }, + ), + }, + ), + Expr( + StmtExpr { + node_index: NodeIndex(None), + range: 27..28, + value: NumberLiteral( + ExprNumberLiteral { + node_index: NodeIndex(None), + range: 27..28, + value: Int( + 2, + ), + }, + ), + }, + ), + ], + elif_else_clauses: [], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | if True: +2 | 1 +3 | / \ +4 | | 2 + | |____^ Syntax Error: Unexpected indentation + | + + + | +3 | \ +4 | 2 + | ^ Syntax Error: Expected a statement + | diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@backslash_continuation_indentation.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@backslash_continuation_indentation.py.snap new file mode 100644 index 0000000000000..08b0a93d074fc --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@backslash_continuation_indentation.py.snap @@ -0,0 +1,84 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +--- +## AST + +``` +Module( + ModModule { + node_index: NodeIndex(None), + range: 0..46, + body: [ + If( + StmtIf { + node_index: NodeIndex(None), + range: 0..45, + test: BooleanLiteral( + ExprBooleanLiteral { + node_index: NodeIndex(None), + range: 3..7, + value: true, + }, + ), + body: [ + Expr( + StmtExpr { + node_index: NodeIndex(None), + range: 23..24, + value: NumberLiteral( + ExprNumberLiteral { + node_index: NodeIndex(None), + range: 23..24, + value: Int( + 1, + ), + }, + ), + }, + ), + Expr( + StmtExpr { + node_index: NodeIndex(None), + range: 31..32, + value: NumberLiteral( + ExprNumberLiteral { + node_index: NodeIndex(None), + range: 31..32, + value: Int( + 2, + ), + }, + ), + }, + ), + ], + elif_else_clauses: [ + ElifElseClause { + range: 33..45, + node_index: NodeIndex(None), + test: None, + body: [ + Expr( + StmtExpr { + node_index: NodeIndex(None), + range: 44..45, + value: NumberLiteral( + ExprNumberLiteral { + node_index: NodeIndex(None), + range: 44..45, + value: Int( + 3, + ), + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +```