From c0094483893554e65421856a74399cdb0e372017 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 10 Jan 2024 15:58:44 -0500 Subject: [PATCH] Add a TreeCursor type that travels over injection layers This uses the layer parentage information from the parent commit to traverse the layers. It's a similar API to `tree_sitter:TreeCursor` but internally it does not use a `tree_sitter::TreeCursor` currently because that interface is behaving very unexpectedly. Using the `next_sibling`/`prev_sibling`/`parent` API on `tree_sitter::Node` reflects the previous code's behavior so this should result in no surprising changes. --- helix-core/src/syntax.rs | 26 +++++- helix-core/src/syntax/tree_cursor.rs | 114 +++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 3 deletions(-) create mode 100644 helix-core/src/syntax/tree_cursor.rs diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index d0af412c9dd71..b27d47e0513d0 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,3 +1,5 @@ +mod tree_cursor; + use crate::{ auto_pairs::AutoPairs, chars::char_is_line_ending, @@ -31,6 +33,8 @@ use serde::{ser::SerializeSeq, Deserialize, Serialize}; use helix_loader::grammar::{get_language, load_runtime_file}; +pub use tree_cursor::TreeCursor; + fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> where D: serde::Deserializer<'de>, @@ -1429,6 +1433,22 @@ impl Syntax { .descendant_for_byte_range(start, end) } + pub fn walk(&self) -> TreeCursor<'_> { + let mut injection_ranges = HashMap::with_capacity(self.layers.len()); + + for (layer_id, layer) in &self.layers { + // Skip the root layer + if layer.parent.is_none() { + continue; + } + for range in layer.ranges.iter() { + injection_ranges.insert(range.start_byte..range.end_byte, layer_id); + } + } + + TreeCursor::new(&self.layers, self.root, injection_ranges) + } + // Commenting // comment_strings_for_pos // is_commented @@ -1663,7 +1683,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, - QueryMatch, Range, TextProvider, Tree, TreeCursor, + QueryMatch, Range, TextProvider, Tree, }; const CANCELLATION_CHECK_INTERVAL: usize = 100; @@ -2592,7 +2612,7 @@ pub fn pretty_print_tree(fmt: &mut W, node: Node) -> fmt::Result fn pretty_print_tree_impl( fmt: &mut W, - cursor: &mut TreeCursor, + cursor: &mut tree_sitter::TreeCursor, depth: usize, ) -> fmt::Result { let node = cursor.node(); @@ -2902,7 +2922,7 @@ mod test { // rule but `name` and `body` belong to an unnamed helper `_method_rest`. // This can cause a bug with a pretty-printing implementation that // uses `Node::field_name_for_child` to determine field names but is - // fixed when using `TreeCursor::field_name`. + // fixed when using `tree_sitter::TreeCursor::field_name`. let source = "def self.method_name true end"; diff --git a/helix-core/src/syntax/tree_cursor.rs b/helix-core/src/syntax/tree_cursor.rs new file mode 100644 index 0000000000000..a4ba2e2e683a4 --- /dev/null +++ b/helix-core/src/syntax/tree_cursor.rs @@ -0,0 +1,114 @@ +use std::{collections::HashMap, ops::Range}; + +use super::{LanguageLayer, LayerId}; + +use slotmap::HopSlotMap; +use tree_sitter::Node; + +pub struct TreeCursor<'a> { + layers: &'a HopSlotMap, + root: LayerId, + current: LayerId, + injection_ranges: HashMap, LayerId>, + // TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but + // that returns very surprising results in testing. + cursor: Node<'a>, +} + +impl<'a> TreeCursor<'a> { + pub(super) fn new( + layers: &'a HopSlotMap, + root: LayerId, + injection_ranges: HashMap, LayerId>, + ) -> Self { + let cursor = layers[root].tree().root_node(); + + Self { + layers, + root, + current: root, + injection_ranges, + cursor, + } + } + + pub fn node(&self) -> Node<'a> { + self.cursor + } + + pub fn goto_parent(&mut self) -> bool { + if let Some(parent) = self.node().parent() { + self.cursor = parent; + return true; + } + + // If we are already on the root layer, we cannot ascend. + if self.current == self.root { + return false; + } + + // Ascend to the parent layer. + let range = self.node().byte_range(); + let parent_id = self.layers[self.current] + .parent + .expect("non-root layers have a parent"); + self.current = parent_id; + let root = self.layers[self.current].tree().root_node(); + self.cursor = root + .descendant_for_byte_range(range.start, range.end) + .unwrap_or(root); + + true + } + + pub fn goto_first_child(&mut self) -> bool { + // Check if the current node's range is an injection layer range. + let range = self.node().byte_range(); + if let Some(layer_id) = self.injection_ranges.get(&range) { + // Switch to the child layer. + self.current = *layer_id; + self.cursor = self.layers[self.current].tree().root_node(); + true + } else if let Some(child) = self.cursor.child(0) { + // Otherwise descend in the current tree. + self.cursor = child; + true + } else { + false + } + } + + pub fn goto_next_sibling(&mut self) -> bool { + if let Some(sibling) = self.cursor.next_sibling() { + self.cursor = sibling; + true + } else { + false + } + } + + pub fn goto_prev_sibling(&mut self) -> bool { + if let Some(sibling) = self.cursor.prev_sibling() { + self.cursor = sibling; + true + } else { + false + } + } + + pub fn reset_to_byte_range(&mut self, start: usize, end: usize) { + let mut container_id = self.root; + + for (layer_id, layer) in self.layers.iter() { + if layer.depth > self.layers[container_id].depth + && layer.contains_byte_range(start, end) + { + container_id = layer_id; + } + } + + self.current = container_id; + let root = self.layers[self.current].tree().root_node(); + self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root); + } +}