From d9bf2977b52b3bdc5c600534e9d5ab4e0779501d Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sat, 27 Apr 2024 15:52:13 +0200 Subject: [PATCH 1/2] improve match bracket matching --- helix-core/src/match_brackets.rs | 123 ++++++++++---------- helix-term/tests/test/commands/movement.rs | 126 +++++++++++++++++++++ 2 files changed, 187 insertions(+), 62 deletions(-) diff --git a/helix-core/src/match_brackets.rs b/helix-core/src/match_brackets.rs index 95d6a3dc4794..0e08c4898c2d 100644 --- a/helix-core/src/match_brackets.rs +++ b/helix-core/src/match_brackets.rs @@ -75,60 +75,64 @@ fn find_pair( ) -> Option { let pos = doc.char_to_byte(pos_); - let root = syntax.tree_for_byte_range(pos, pos + 1).root_node(); - let mut node = root.descendant_for_byte_range(pos, pos + 1)?; + let root = syntax.tree_for_byte_range(pos, pos).root_node(); + let mut node = root.descendant_for_byte_range(pos, pos)?; loop { - if node.is_named() { - let (start_byte, end_byte) = surrounding_bytes(doc, &node)?; - let (start_char, end_char) = (doc.byte_to_char(start_byte), doc.byte_to_char(end_byte)); - - if is_valid_pair_on_pos(doc, start_char, end_char) { - if end_byte == pos { - return Some(start_char); - } - - // We return the end char if the cursor is either on the start char - // or at some arbitrary position between start and end char. - if traverse_parents || start_byte == pos { - return Some(end_char); + if node.is_named() && node.child_count() >= 2 { + let open = node.child(0).unwrap(); + let close = node.child(node.child_count() - 1).unwrap(); + + if let (Some((start_pos, open)), Some((end_pos, close))) = + (as_char(doc, &open), as_char(doc, &close)) + { + if PAIRS.contains(&(open, close)) { + if start_pos == pos_ { + return Some(start_pos); + } + + // We return the end char if the cursor is either on the start char + // or at some arbitrary position between start and end char. + if traverse_parents || end_pos == pos_ { + return Some(end_pos); + } } } } // this node itselt wasn't a pair but maybe its siblings are - // check if we are *on* the pair (special cased so we don't look - // at the current node twice and to jump to the start on that case) - if let Some(open) = as_close_pair(doc, &node) { - if let Some(pair_start) = find_pair_end(doc, node.prev_sibling(), open, Backward) { + if let Some((start_char, end_char)) = as_close_pair(doc, &node) { + if let Some(pair_start) = + find_pair_end(doc, node.prev_sibling(), start_char, end_char, Backward) + { return Some(pair_start); } } - - if !traverse_parents { - // check if we are *on* the opening pair (special cased here as - // an opptimization since we only care about bracket on the cursor - // here) - if let Some(close) = as_open_pair(doc, &node) { - if let Some(pair_end) = find_pair_end(doc, node.next_sibling(), close, Forward) { - return Some(pair_end); - } - } - if node.is_named() { - break; + if let Some((start_char, end_char)) = as_open_pair(doc, &node) { + if let Some(pair_end) = + find_pair_end(doc, node.next_sibling(), start_char, end_char, Forward) + { + return Some(pair_end); } } - for close in - iter::successors(node.next_sibling(), |node| node.next_sibling()).take(MATCH_LIMIT) - { - let Some(open) = as_close_pair(doc, &close) else { - continue; - }; - if find_pair_end(doc, Some(node), open, Backward).is_some() { - return doc.try_byte_to_char(close.start_byte()).ok(); + if traverse_parents { + for sibling in + iter::successors(node.next_sibling(), |node| node.next_sibling()).take(MATCH_LIMIT) + { + let Some((start_char, end_char)) = as_close_pair(doc, &sibling) else { + continue; + }; + if find_pair_end(doc, sibling.prev_sibling(), start_char, end_char, Backward) + .is_some() + { + return doc.try_byte_to_char(sibling.start_byte()).ok(); + } } + } else if node.is_named() { + break; } + let Some(parent) = node.parent() else { break; }; @@ -241,29 +245,13 @@ pub fn is_valid_pair(ch: char) -> bool { PAIRS.iter().any(|(l, r)| *l == ch || *r == ch) } -fn is_valid_pair_on_pos(doc: RopeSlice, start_char: usize, end_char: usize) -> bool { - PAIRS.contains(&(doc.char(start_char), doc.char(end_char))) -} - -fn surrounding_bytes(doc: RopeSlice, node: &Node) -> Option<(usize, usize)> { - let len = doc.len_bytes(); - - let start_byte = node.start_byte(); - let end_byte = node.end_byte().saturating_sub(1); - - if start_byte >= len || end_byte >= len { - return None; - } - - Some((start_byte, end_byte)) -} - /// Tests if this node is a pair close char and returns the expected open char -fn as_close_pair(doc: RopeSlice, node: &Node) -> Option { +/// and close char contained in this node +fn as_close_pair(doc: RopeSlice, node: &Node) -> Option<(char, char)> { let close = as_char(doc, node)?.1; PAIRS .iter() - .find_map(|&(open, close_)| (close_ == close).then_some(open)) + .find_map(|&(open, close_)| (close_ == close).then_some((close, open))) } /// Checks if `node` or its siblings (at most MATCH_LIMIT nodes) is the specified closing char @@ -274,6 +262,7 @@ fn as_close_pair(doc: RopeSlice, node: &Node) -> Option { fn find_pair_end( doc: RopeSlice, node: Option, + start_char: char, end_char: char, direction: Direction, ) -> Option { @@ -281,20 +270,30 @@ fn find_pair_end( Forward => Node::next_sibling, Backward => Node::prev_sibling, }; + let mut depth = 0; iter::successors(node, advance) .take(MATCH_LIMIT) .find_map(|node| { let (pos, c) = as_char(doc, &node)?; - (end_char == c).then_some(pos) + if c == end_char { + if depth == 0 { + return Some(pos); + } + depth -= 1; + } else if c == start_char { + depth += 1; + } + None }) } -/// Tests if this node is a pair close char and returns the expected open char -fn as_open_pair(doc: RopeSlice, node: &Node) -> Option { +/// Tests if this node is a pair open char and returns the expected close char +/// and open char contained in this node +fn as_open_pair(doc: RopeSlice, node: &Node) -> Option<(char, char)> { let open = as_char(doc, node)?.1; PAIRS .iter() - .find_map(|&(open_, close)| (open_ == open).then_some(close)) + .find_map(|&(open_, close)| (open_ == open).then_some((open, close))) } /// If node is a single char return it (and its char position) diff --git a/helix-term/tests/test/commands/movement.rs b/helix-term/tests/test/commands/movement.rs index b2471f6359bb..30673ee3d36a 100644 --- a/helix-term/tests/test/commands/movement.rs +++ b/helix-term/tests/test/commands/movement.rs @@ -806,3 +806,129 @@ async fn test_select_prev_sibling() -> anyhow::Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread")] +async fn match_bracket() -> anyhow::Result<()> { + let rust_tests = vec![ + // fwd + ( + indoc! {r##" + fn foo(x: usize) -> usize { #[x|]# + 1 } + "##}, + "mm", + indoc! {r##" + fn foo(x: usize) -> usize { x + 1 #[}|]# + "##}, + ), + // backward + ( + indoc! {r##" + fn foo(x: usize) -> usize { #[x|]# + 1 } + "##}, + "mmmm", + indoc! {r##" + fn foo(x: usize) -> usize #[{|]# x + 1 } + "##}, + ), + // avoid false positive inside string literal + ( + indoc! {r##" + fn foo() -> &'static str { "(hello#[ |]#world)" } + "##}, + "mm", + indoc! {r##" + fn foo() -> &'static str { "(hello world)#["|]# } + "##}, + ), + // make sure matching on quotes works + ( + indoc! {r##" + fn foo() -> &'static str { "(hello#[ |]#world)" } + "##}, + "mm", + indoc! {r##" + fn foo() -> &'static str { "(hello world)#["|]# } + "##}, + ), + // .. on both ends + ( + indoc! {r##" + fn foo() -> &'static str { "(hello#[ |]#world)" } + "##}, + "mmmm", + indoc! {r##" + fn foo() -> &'static str { #["|]#(hello world)" } + "##}, + ), + // match on siblings nodes + ( + indoc! {r##" + fn foo(bar: Option) -> usize { + match bar { + Some(b#[a|]#r) => bar, + None => 42, + } + } + "##}, + "mmmm", + indoc! {r##" + fn foo(bar: Option) -> usize { + match bar { + Some#[(|]#bar) => bar, + None => 42, + } + } + "##}, + ), + // gracefully handle multiple sibling brackets (usally for errors/incomplete syntax trees) + // in the past we selected the first > instead of the second > here + ( + indoc! {r##" + fn foo() { + foo::> + } + "##}, + "mm", + indoc! {r##" + fn foo() { + foo::#[>|]# + } + "##}, + ), + ]; + + let python_tests = vec![ + // python quotes have a slightly more complex syntax tree + // that tirggerd a bug in an old implementation so we test + // them here + ( + indoc! {r##" + foo_python = "mm does not#[ |]#work on this string" + "##}, + "mm", + indoc! {r##" + foo_python = "mm does not work on this string#["|]# + "##}, + ), + ( + indoc! {r##" + foo_python = "mm does not#[ |]#work on this string" + "##}, + "mmmm", + indoc! {r##" + foo_python = #["|]#mm does not work on this string" + "##}, + ), + ]; + + for test in rust_tests { + println!("{test:?}"); + test_with_config(AppBuilder::new().with_file("foo.rs", None), test).await?; + } + for test in python_tests { + println!("{test:?}"); + test_with_config(AppBuilder::new().with_file("foo.py", None), test).await?; + } + + Ok(()) +} From 4de8bd3490107a4063d36e72f1c01be82128b06c Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Mon, 6 May 2024 17:04:43 +0200 Subject: [PATCH 2/2] Update helix-term/tests/test/commands/movement.rs Co-authored-by: Michael Davis --- helix-term/tests/test/commands/movement.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helix-term/tests/test/commands/movement.rs b/helix-term/tests/test/commands/movement.rs index 30673ee3d36a..ed6984288ec4 100644 --- a/helix-term/tests/test/commands/movement.rs +++ b/helix-term/tests/test/commands/movement.rs @@ -899,7 +899,7 @@ async fn match_bracket() -> anyhow::Result<()> { let python_tests = vec![ // python quotes have a slightly more complex syntax tree - // that tirggerd a bug in an old implementation so we test + // that triggerd a bug in an old implementation so we test // them here ( indoc! {r##"