-
-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix python indentation #3382
Fix python indentation #3382
Changes from 6 commits
9d2ba86
740be6c
adb75d4
a642845
0323837
58f8f4c
4e0ac14
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -192,13 +192,15 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize { | |
|
||
/// Computes for node and all ancestors whether they are the first node on their line. | ||
/// The first entry in the return value represents the root node, the last one the node itself | ||
fn get_first_in_line(mut node: Node, byte_pos: usize, new_line: bool) -> Vec<bool> { | ||
fn get_first_in_line(mut node: Node, new_line_byte_pos: Option<usize>) -> Vec<bool> { | ||
let mut first_in_line = Vec::new(); | ||
loop { | ||
if let Some(prev) = node.prev_sibling() { | ||
// If we insert a new line, the first node at/after the cursor is considered to be the first in its line | ||
let first = prev.end_position().row != node.start_position().row | ||
|| (new_line && node.start_byte() >= byte_pos && prev.start_byte() < byte_pos); | ||
|| new_line_byte_pos.map_or(false, |byte_pos| { | ||
node.start_byte() >= byte_pos && prev.start_byte() < byte_pos | ||
}); | ||
first_in_line.push(Some(first)); | ||
} else { | ||
// Nodes that have no previous siblings are first in their line if and only if their parent is | ||
|
@@ -298,8 +300,21 @@ enum IndentScope { | |
Tail, | ||
} | ||
|
||
/// Execute the indent query. | ||
/// Returns for each node (identified by its id) a list of indent captures for that node. | ||
/// A capture from the indent query which does not define an indent but extends | ||
/// the range of a node. This is used before the indent is calculated. | ||
enum ExtendCapture { | ||
ExtendIndented, | ||
StopExtend, | ||
} | ||
|
||
/// The result of running a tree-sitter indent query. This stores for | ||
/// each node (identified by its ID) the relevant captures (already filtered | ||
/// by predicates). | ||
struct IndentQueryResult { | ||
indent_captures: HashMap<usize, Vec<IndentCapture>>, | ||
extend_captures: HashMap<usize, Vec<ExtendCapture>>, | ||
} | ||
|
||
fn query_indents( | ||
query: &Query, | ||
syntax: &Syntax, | ||
|
@@ -309,8 +324,9 @@ fn query_indents( | |
// Position of the (optional) newly inserted line break. | ||
// Given as (line, byte_pos) | ||
new_line_break: Option<(usize, usize)>, | ||
) -> HashMap<usize, Vec<IndentCapture>> { | ||
) -> IndentQueryResult { | ||
let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new(); | ||
let mut extend_captures: HashMap<usize, Vec<ExtendCapture>> = HashMap::new(); | ||
cursor.set_byte_range(range); | ||
// Iterate over all captures from the query | ||
for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) { | ||
|
@@ -374,10 +390,24 @@ fn query_indents( | |
continue; | ||
} | ||
for capture in m.captures { | ||
let capture_type = query.capture_names()[capture.index as usize].as_str(); | ||
let capture_type = match capture_type { | ||
let capture_name = query.capture_names()[capture.index as usize].as_str(); | ||
let capture_type = match capture_name { | ||
"indent" => IndentCaptureType::Indent, | ||
"outdent" => IndentCaptureType::Outdent, | ||
"extend-indented" => { | ||
extend_captures | ||
.entry(capture.node.id()) | ||
.or_insert_with(|| Vec::with_capacity(1)) | ||
.push(ExtendCapture::ExtendIndented); | ||
continue; | ||
} | ||
"stop-extend" => { | ||
extend_captures | ||
.entry(capture.node.id()) | ||
.or_insert_with(|| Vec::with_capacity(1)) | ||
.push(ExtendCapture::StopExtend); | ||
continue; | ||
} | ||
_ => { | ||
// Ignore any unknown captures (these may be needed for predicates such as #match?) | ||
continue; | ||
|
@@ -420,7 +450,72 @@ fn query_indents( | |
.push(indent_capture); | ||
} | ||
} | ||
indent_captures | ||
IndentQueryResult { | ||
indent_captures, | ||
extend_captures, | ||
} | ||
} | ||
|
||
/// Handle extend queries. deepest_preceding is the deepest descendant of node that directly precedes the cursor position. | ||
/// Any ancestor of deepest_preceding which is also a descendant of node may be "extended". In that case, node will be updated, | ||
/// so that the indent computation starts with the correct syntax node. | ||
fn extend_nodes<'a>( | ||
node: &mut Node<'a>, | ||
deepest_preceding: Option<Node<'a>>, | ||
extend_captures: &HashMap<usize, Vec<ExtendCapture>>, | ||
text: RopeSlice, | ||
line: usize, | ||
tab_width: usize, | ||
) { | ||
if let Some(mut deepest_preceding) = deepest_preceding { | ||
let mut stop_extend = false; | ||
while deepest_preceding != *node { | ||
let mut extend_node = false; | ||
// This will be set to true if this node is captured, regardless of whether | ||
// it actually will be extended (e.g. because the cursor isn't indented | ||
// more than the node). | ||
let mut node_captured = false; | ||
if let Some(captures) = extend_captures.get(&deepest_preceding.id()) { | ||
for capture in captures { | ||
match capture { | ||
ExtendCapture::StopExtend => { | ||
stop_extend = true; | ||
} | ||
ExtendCapture::ExtendIndented => { | ||
node_captured = true; | ||
// We extend the node if | ||
// - the cursor is on the same line as the end of the node OR | ||
// - the line that the cursor is on is more indented than the | ||
// first line of the node | ||
if deepest_preceding.end_position().row == line { | ||
extend_node = true; | ||
} else { | ||
let cursor_indent = | ||
indent_level_for_line(text.line(line), tab_width); | ||
let node_indent = indent_level_for_line( | ||
text.line(deepest_preceding.start_position().row), | ||
tab_width, | ||
); | ||
if cursor_indent > node_indent { | ||
extend_node = true; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
// If we encountered some `StopExtend` capture before, we don't | ||
// extend the node even if we otherwise would | ||
if node_captured && stop_extend { | ||
stop_extend = false; | ||
} else if extend_node && !stop_extend { | ||
*node = deepest_preceding; | ||
break; | ||
} | ||
// This parent always exists since node is an ancestor of deepest_preceding | ||
deepest_preceding = deepest_preceding.parent().unwrap(); | ||
} | ||
} | ||
} | ||
|
||
/// Use the syntax tree to determine the indentation for a given position. | ||
|
@@ -459,40 +554,73 @@ fn query_indents( | |
/// }, | ||
/// ); | ||
/// ``` | ||
#[allow(clippy::too_many_arguments)] | ||
pub fn treesitter_indent_for_pos( | ||
query: &Query, | ||
syntax: &Syntax, | ||
indent_style: &IndentStyle, | ||
tab_width: usize, | ||
text: RopeSlice, | ||
line: usize, | ||
pos: usize, | ||
new_line: bool, | ||
) -> Option<String> { | ||
let byte_pos = text.char_to_byte(pos); | ||
// The innermost tree-sitter node which is considered for the indent | ||
// computation. It may change if some predeceding node is extended | ||
let mut node = syntax | ||
.tree() | ||
.root_node() | ||
.descendant_for_byte_range(byte_pos, byte_pos)?; | ||
let mut first_in_line = get_first_in_line(node, byte_pos, new_line); | ||
let new_line_break = if new_line { | ||
Some((line, byte_pos)) | ||
} else { | ||
None | ||
let (query_result, deepest_preceding) = { | ||
// The query range should intersect with all nodes directly preceding | ||
// the cursor in case one of them is extended. | ||
let mut deepest_preceding = None; // The deepest node preceding the cursor | ||
let mut tree_cursor = node.walk(); | ||
for child in node.children(&mut tree_cursor) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, this is probably just due to unfamiliarity, but I'm not understanding how you can have the cursor on a node with children that happen before the cursor. Could you please give an example of this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think my comments are somewhat misleading: By "cursor" I mean the position of the indent query (not the tree-sitter cursor). Usually, this will just be the position of the text cursor when starting a new line. I'll change the comments to clarify that. An example then looks like this: def func(): # <-- The function_definition node ends here
# <--- The indent is queried for this position The |
||
if child.byte_range().end <= byte_pos { | ||
deepest_preceding = Some(child); | ||
} | ||
} | ||
deepest_preceding = deepest_preceding.map(|mut prec| { | ||
// Get the deepest directly preceding node | ||
while prec.child_count() > 0 { | ||
prec = prec.child(prec.child_count() - 1).unwrap(); | ||
} | ||
prec | ||
}); | ||
let query_range = deepest_preceding | ||
.map(|prec| prec.byte_range().end - 1..byte_pos + 1) | ||
.unwrap_or(byte_pos..byte_pos + 1); | ||
|
||
crate::syntax::PARSER.with(|ts_parser| { | ||
let mut ts_parser = ts_parser.borrow_mut(); | ||
let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); | ||
let query_result = query_indents( | ||
query, | ||
syntax, | ||
&mut cursor, | ||
text, | ||
query_range, | ||
new_line.then(|| (line, byte_pos)), | ||
); | ||
ts_parser.cursors.push(cursor); | ||
(query_result, deepest_preceding) | ||
}) | ||
}; | ||
let query_result = crate::syntax::PARSER.with(|ts_parser| { | ||
let mut ts_parser = ts_parser.borrow_mut(); | ||
let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); | ||
let query_result = query_indents( | ||
query, | ||
syntax, | ||
&mut cursor, | ||
text, | ||
byte_pos..byte_pos + 1, | ||
new_line_break, | ||
); | ||
ts_parser.cursors.push(cursor); | ||
query_result | ||
}); | ||
let indent_captures = query_result.indent_captures; | ||
let extend_captures = query_result.extend_captures; | ||
|
||
// Check for extend captures, potentially changing the node that the indent calculation starts with | ||
extend_nodes( | ||
&mut node, | ||
deepest_preceding, | ||
&extend_captures, | ||
text, | ||
line, | ||
tab_width, | ||
); | ||
let mut first_in_line = get_first_in_line(node, new_line.then(|| byte_pos)); | ||
|
||
let mut result = Indentation::default(); | ||
// We always keep track of all the indent changes on one line, in order to only indent once | ||
|
@@ -504,7 +632,7 @@ pub fn treesitter_indent_for_pos( | |
// one entry for each ancestor of the node (which is what we iterate over) | ||
let is_first = *first_in_line.last().unwrap(); | ||
// Apply all indent definitions for this node | ||
if let Some(definitions) = query_result.get(&node.id()) { | ||
if let Some(definitions) = indent_captures.get(&node.id()) { | ||
for definition in definitions { | ||
match definition.scope { | ||
IndentScope::All => { | ||
|
@@ -550,7 +678,13 @@ pub fn treesitter_indent_for_pos( | |
node = parent; | ||
first_in_line.pop(); | ||
} else { | ||
result.add_line(&indent_for_line_below); | ||
// Only add the indentation for the line below if that line | ||
// is not after the line that the indentation is calculated for. | ||
if (node.start_position().row < line) | ||
|| (new_line && node.start_position().row == line && node.start_byte() < byte_pos) | ||
{ | ||
result.add_line(&indent_for_line_below); | ||
} | ||
result.add_line(&indent_for_line); | ||
break; | ||
} | ||
|
@@ -579,6 +713,7 @@ pub fn indent_for_newline( | |
query, | ||
syntax, | ||
indent_style, | ||
tab_width, | ||
text, | ||
line_before, | ||
line_before_end_pos, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm curious if we can fork tree-sitter-python and have the function_definition and other rules that end in
:
consume the newline. That way the cursor would belong to the indented node and I think we might not need the extend system. I'll try it outThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the behavior would need to be a bit more complicated: It doesn't just have to consume the newline but also any following indented lines. Additionally, this should not happen when the function ends with for example a
return
expression.If all this was implemented in the grammar, it would make this PR redundant but I'm not sure if it's worth the effort of forking the tree-sitter grammars for all languages that are affected by this.