Skip to content

Commit

Permalink
Simple lexer for formatter
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser committed Jun 8, 2023
1 parent 467df23 commit 153fd8c
Show file tree
Hide file tree
Showing 11 changed files with 1,145 additions and 182 deletions.
80 changes: 35 additions & 45 deletions crates/ruff_python_formatter/src/comments/placement.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use crate::comments::visitor::{CommentPlacement, DecoratedComment};
use crate::comments::CommentTextPosition;
use crate::trivia::find_first_non_trivia_character_in_range;
use crate::trivia::{SimpleTokenizer, TokenKind};
use ruff_newlines::StrExt;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::whitespace;
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::Ranged;
use std::cmp::Ordering;

Expand Down Expand Up @@ -521,14 +521,16 @@ fn handle_trailing_end_of_line_condition_comment<'a>(
// If the preceding is the node before the `colon`
// `while true:` The node before the `colon` is the `true` constant.
if preceding.ptr_eq(last_before_colon) {
let mut start = preceding.end();
while let Some((offset, c)) = find_first_non_trivia_character_in_range(
TextRange::new(start, following.start()),
let tokens = SimpleTokenizer::new(
locator.contents(),
) {
match c {
':' => {
if comment.slice().start() > offset {
TextRange::new(preceding.end(), following.start()),
)
.skip_trivia();

for token in tokens {
match token.kind() {
TokenKind::Colon => {
if comment.slice().start() > token.start() {
// Comment comes after the colon
// ```python
// while a: # comment
Expand All @@ -546,9 +548,8 @@ fn handle_trailing_end_of_line_condition_comment<'a>(
// ```
break;
}
')' => {
TokenKind::RParen => {
// Skip over any closing parentheses
start = offset + ')'.text_len();
}
_ => {
unreachable!("Only ')' or ':' should follow the condition")
Expand Down Expand Up @@ -652,21 +653,17 @@ fn handle_trailing_binary_expression_left_or_operator_comment<'a>(
return CommentPlacement::Default(comment);
}

let mut between_operands_range = TextRange::new(
let between_operands_range = TextRange::new(
binary_expression.left.end(),
binary_expression.right.start(),
);

let operator_offset = loop {
match find_first_non_trivia_character_in_range(between_operands_range, locator.contents()) {
// Skip over closing parens
Some((offset, ')')) => {
between_operands_range =
TextRange::new(offset + TextSize::new(1), between_operands_range.end());
}
Some((offset, _)) => break offset,
None => return CommentPlacement::Default(comment),
}
let mut tokens = SimpleTokenizer::new(locator.contents(), between_operands_range).skip_trivia();
let operator_offset = if let Some(non_r_paren) = tokens.find(|t| t.kind() != TokenKind::RParen)
{
non_r_paren.start()
} else {
return CommentPlacement::Default(comment);
};

let comment_range = comment.slice().range();
Expand Down Expand Up @@ -805,29 +802,22 @@ fn find_pos_only_slash_offset(
between_arguments_range: TextRange,
locator: &Locator,
) -> Option<TextSize> {
// First find the comma separating the two arguments
find_first_non_trivia_character_in_range(between_arguments_range, locator.contents()).and_then(
|(comma_offset, comma)| {
debug_assert_eq!(comma, ',');

// Then find the position of the `/` operator
find_first_non_trivia_character_in_range(
TextRange::new(
comma_offset + TextSize::new(1),
between_arguments_range.end(),
),
locator.contents(),
)
.and_then(|(offset, c)| {
if c == '/' {
Some(offset)
} else {
debug_assert_eq!(c, ')');
None
}
})
},
)
let mut tokens =
SimpleTokenizer::new(locator.contents(), between_arguments_range).skip_trivia();

if let Some(comma) = tokens.next() {
debug_assert_eq!(comma.kind(), TokenKind::Comma);

if let Some(maybe_slash) = tokens.next() {
if maybe_slash.kind() == TokenKind::Slash {
return Some(maybe_slash.start());
}

debug_assert_eq!(maybe_slash.kind(), TokenKind::RParen);
}
}

None
}

/// Returns `true` if `right` is `Some` and `left` and `right` are referentially equal.
Expand Down
31 changes: 13 additions & 18 deletions crates/ruff_python_formatter/src/expression/parentheses.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::trivia::{
find_first_non_trivia_character_after, find_first_non_trivia_character_before,
};
use crate::trivia::{first_non_trivia_token, first_non_trivia_token_rev, Token, TokenKind};
use ruff_python_ast::node::AnyNodeRef;
use rustpython_parser::ast::Ranged;

pub(crate) trait NeedsParentheses {
fn needs_parentheses(&self, parenthesize: Parenthesize, source: &str) -> Parentheses;
Expand Down Expand Up @@ -73,21 +72,17 @@ pub enum Parentheses {
}

fn is_expression_parenthesized(expr: AnyNodeRef, contents: &str) -> bool {
use rustpython_parser::ast::Ranged;

debug_assert!(
expr.is_expression(),
"Should only be called for expressions"
);

// Search backwards to avoid ambiguity with `(a, )` and because it's faster
matches!(
find_first_non_trivia_character_after(expr.end(), contents),
Some((_, ')'))
)
// Search forwards to confirm that this is not a nested expression `(5 + d * 3)`
&& matches!(
find_first_non_trivia_character_before(expr.start(), contents),
Some((_, '('))
first_non_trivia_token(expr.end(), contents),
Some(Token {
kind: TokenKind::RParen,
..
})
) && matches!(
first_non_trivia_token_rev(expr.start(), contents),
Some(Token {
kind: TokenKind::LParen,
..
})
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
expression: test_case.tokenize_reverse()
---
[
Token {
kind: RParen,
range: 52..53,
},
Token {
kind: Other,
range: 51..52,
},
Token {
kind: Bogus,
range: 50..51,
},
Token {
kind: Bogus,
range: 49..50,
},
Token {
kind: Bogus,
range: 48..49,
},
Token {
kind: Bogus,
range: 47..48,
},
Token {
kind: Bogus,
range: 46..47,
},
Token {
kind: Bogus,
range: 45..46,
},
Token {
kind: Bogus,
range: 44..45,
},
Token {
kind: Bogus,
range: 43..44,
},
Token {
kind: Bogus,
range: 42..43,
},
Token {
kind: Bogus,
range: 41..42,
},
Token {
kind: Bogus,
range: 40..41,
},
Token {
kind: Bogus,
range: 39..40,
},
Token {
kind: Bogus,
range: 38..39,
},
Token {
kind: Bogus,
range: 37..38,
},
Token {
kind: Bogus,
range: 36..37,
},
Token {
kind: Bogus,
range: 35..36,
},
Token {
kind: Bogus,
range: 34..35,
},
Token {
kind: Bogus,
range: 33..34,
},
Token {
kind: Bogus,
range: 32..33,
},
Token {
kind: Bogus,
range: 31..32,
},
Token {
kind: Bogus,
range: 30..31,
},
Token {
kind: Bogus,
range: 29..30,
},
Token {
kind: Bogus,
range: 28..29,
},
Token {
kind: Bogus,
range: 27..28,
},
Token {
kind: Bogus,
range: 26..27,
},
Token {
kind: Bogus,
range: 25..26,
},
Token {
kind: Bogus,
range: 24..25,
},
Token {
kind: Bogus,
range: 23..24,
},
Token {
kind: Bogus,
range: 22..23,
},
Token {
kind: Bogus,
range: 21..22,
},
Token {
kind: Bogus,
range: 20..21,
},
Token {
kind: Bogus,
range: 19..20,
},
Token {
kind: Bogus,
range: 18..19,
},
Token {
kind: Bogus,
range: 17..18,
},
Token {
kind: Bogus,
range: 16..17,
},
Token {
kind: Bogus,
range: 15..16,
},
Token {
kind: Bogus,
range: 14..15,
},
Token {
kind: Bogus,
range: 13..14,
},
Token {
kind: Bogus,
range: 12..13,
},
Token {
kind: Bogus,
range: 11..12,
},
Token {
kind: Bogus,
range: 10..11,
},
Token {
kind: Bogus,
range: 9..10,
},
Token {
kind: Bogus,
range: 8..9,
},
Token {
kind: Bogus,
range: 7..8,
},
Token {
kind: Bogus,
range: 6..7,
},
Token {
kind: Bogus,
range: 5..6,
},
Token {
kind: Bogus,
range: 4..5,
},
Token {
kind: Bogus,
range: 3..4,
},
Token {
kind: Bogus,
range: 2..3,
},
Token {
kind: Bogus,
range: 1..2,
},
Token {
kind: Bogus,
range: 0..1,
},
]
Loading

0 comments on commit 153fd8c

Please sign in to comment.