Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simple lexer for formatter #4922

Merged
merged 1 commit into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 35 additions & 45 deletions crates/ruff_python_formatter/src/comments/placement.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use crate::comments::visitor::{CommentPlacement, DecoratedComment};
use crate::comments::CommentTextPosition;
use crate::trivia::find_first_non_trivia_character_in_range;
use crate::trivia::{SimpleTokenizer, TokenKind};
use ruff_newlines::StrExt;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::whitespace;
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::Ranged;
use std::cmp::Ordering;

Expand Down Expand Up @@ -521,14 +521,16 @@ fn handle_trailing_end_of_line_condition_comment<'a>(
// If the preceding is the node before the `colon`
// `while true:` The node before the `colon` is the `true` constant.
if preceding.ptr_eq(last_before_colon) {
let mut start = preceding.end();
while let Some((offset, c)) = find_first_non_trivia_character_in_range(
TextRange::new(start, following.start()),
let tokens = SimpleTokenizer::new(
locator.contents(),
) {
match c {
':' => {
if comment.slice().start() > offset {
TextRange::new(preceding.end(), following.start()),
)
.skip_trivia();

for token in tokens {
match token.kind() {
TokenKind::Colon => {
if comment.slice().start() > token.start() {
// Comment comes after the colon
// ```python
// while a: # comment
Expand All @@ -546,9 +548,8 @@ fn handle_trailing_end_of_line_condition_comment<'a>(
// ```
break;
}
')' => {
TokenKind::RParen => {
// Skip over any closing parentheses
start = offset + ')'.text_len();
}
_ => {
unreachable!("Only ')' or ':' should follow the condition")
Expand Down Expand Up @@ -652,21 +653,17 @@ fn handle_trailing_binary_expression_left_or_operator_comment<'a>(
return CommentPlacement::Default(comment);
}

let mut between_operands_range = TextRange::new(
let between_operands_range = TextRange::new(
binary_expression.left.end(),
binary_expression.right.start(),
);

let operator_offset = loop {
match find_first_non_trivia_character_in_range(between_operands_range, locator.contents()) {
// Skip over closing parens
Some((offset, ')')) => {
between_operands_range =
TextRange::new(offset + TextSize::new(1), between_operands_range.end());
}
Some((offset, _)) => break offset,
None => return CommentPlacement::Default(comment),
}
let mut tokens = SimpleTokenizer::new(locator.contents(), between_operands_range).skip_trivia();
let operator_offset = if let Some(non_r_paren) = tokens.find(|t| t.kind() != TokenKind::RParen)
{
non_r_paren.start()
} else {
return CommentPlacement::Default(comment);
};

let comment_range = comment.slice().range();
Expand Down Expand Up @@ -805,29 +802,22 @@ fn find_pos_only_slash_offset(
between_arguments_range: TextRange,
locator: &Locator,
) -> Option<TextSize> {
// First find the comma separating the two arguments
find_first_non_trivia_character_in_range(between_arguments_range, locator.contents()).and_then(
|(comma_offset, comma)| {
debug_assert_eq!(comma, ',');

// Then find the position of the `/` operator
find_first_non_trivia_character_in_range(
TextRange::new(
comma_offset + TextSize::new(1),
between_arguments_range.end(),
),
locator.contents(),
)
.and_then(|(offset, c)| {
if c == '/' {
Some(offset)
} else {
debug_assert_eq!(c, ')');
None
}
})
},
)
let mut tokens =
SimpleTokenizer::new(locator.contents(), between_arguments_range).skip_trivia();

if let Some(comma) = tokens.next() {
debug_assert_eq!(comma.kind(), TokenKind::Comma);

if let Some(maybe_slash) = tokens.next() {
if maybe_slash.kind() == TokenKind::Slash {
return Some(maybe_slash.start());
}

debug_assert_eq!(maybe_slash.kind(), TokenKind::RParen);
}
}

None
}

/// Returns `true` if `right` is `Some` and `left` and `right` are referentially equal.
Expand Down
31 changes: 13 additions & 18 deletions crates/ruff_python_formatter/src/expression/parentheses.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::trivia::{
find_first_non_trivia_character_after, find_first_non_trivia_character_before,
};
use crate::trivia::{first_non_trivia_token, first_non_trivia_token_rev, Token, TokenKind};
use ruff_python_ast::node::AnyNodeRef;
use rustpython_parser::ast::Ranged;

pub(crate) trait NeedsParentheses {
fn needs_parentheses(&self, parenthesize: Parenthesize, source: &str) -> Parentheses;
Expand Down Expand Up @@ -73,21 +72,17 @@ pub enum Parentheses {
}

fn is_expression_parenthesized(expr: AnyNodeRef, contents: &str) -> bool {
use rustpython_parser::ast::Ranged;

debug_assert!(
expr.is_expression(),
"Should only be called for expressions"
);

// Search backwards to avoid ambiguity with `(a, )` and because it's faster
matches!(
find_first_non_trivia_character_after(expr.end(), contents),
Some((_, ')'))
)
// Search forwards to confirm that this is not a nested expression `(5 + d * 3)`
&& matches!(
find_first_non_trivia_character_before(expr.start(), contents),
Some((_, '('))
first_non_trivia_token(expr.end(), contents),
Some(Token {
kind: TokenKind::RParen,
..
})
) && matches!(
first_non_trivia_token_rev(expr.start(), contents),
Some(Token {
kind: TokenKind::LParen,
..
})
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
expression: test_case.tokenize_reverse()
---
[
Token {
kind: RParen,
range: 52..53,
},
Token {
kind: Other,
range: 51..52,
},
Token {
kind: Bogus,
range: 50..51,
},
Token {
kind: Bogus,
range: 49..50,
},
Token {
kind: Bogus,
range: 48..49,
},
Token {
kind: Bogus,
range: 47..48,
},
Token {
kind: Bogus,
range: 46..47,
},
Token {
kind: Bogus,
range: 45..46,
},
Token {
kind: Bogus,
range: 44..45,
},
Token {
kind: Bogus,
range: 43..44,
},
Token {
kind: Bogus,
range: 42..43,
},
Token {
kind: Bogus,
range: 41..42,
},
Token {
kind: Bogus,
range: 40..41,
},
Token {
kind: Bogus,
range: 39..40,
},
Token {
kind: Bogus,
range: 38..39,
},
Token {
kind: Bogus,
range: 37..38,
},
Token {
kind: Bogus,
range: 36..37,
},
Token {
kind: Bogus,
range: 35..36,
},
Token {
kind: Bogus,
range: 34..35,
},
Token {
kind: Bogus,
range: 33..34,
},
Token {
kind: Bogus,
range: 32..33,
},
Token {
kind: Bogus,
range: 31..32,
},
Token {
kind: Bogus,
range: 30..31,
},
Token {
kind: Bogus,
range: 29..30,
},
Token {
kind: Bogus,
range: 28..29,
},
Token {
kind: Bogus,
range: 27..28,
},
Token {
kind: Bogus,
range: 26..27,
},
Token {
kind: Bogus,
range: 25..26,
},
Token {
kind: Bogus,
range: 24..25,
},
Token {
kind: Bogus,
range: 23..24,
},
Token {
kind: Bogus,
range: 22..23,
},
Token {
kind: Bogus,
range: 21..22,
},
Token {
kind: Bogus,
range: 20..21,
},
Token {
kind: Bogus,
range: 19..20,
},
Token {
kind: Bogus,
range: 18..19,
},
Token {
kind: Bogus,
range: 17..18,
},
Token {
kind: Bogus,
range: 16..17,
},
Token {
kind: Bogus,
range: 15..16,
},
Token {
kind: Bogus,
range: 14..15,
},
Token {
kind: Bogus,
range: 13..14,
},
Token {
kind: Bogus,
range: 12..13,
},
Token {
kind: Bogus,
range: 11..12,
},
Token {
kind: Bogus,
range: 10..11,
},
Token {
kind: Bogus,
range: 9..10,
},
Token {
kind: Bogus,
range: 8..9,
},
Token {
kind: Bogus,
range: 7..8,
},
Token {
kind: Bogus,
range: 6..7,
},
Token {
kind: Bogus,
range: 5..6,
},
Token {
kind: Bogus,
range: 4..5,
},
Token {
kind: Bogus,
range: 3..4,
},
Token {
kind: Bogus,
range: 2..3,
},
Token {
kind: Bogus,
range: 1..2,
},
Token {
kind: Bogus,
range: 0..1,
},
]
Loading