Skip to content

Commit

Permalink
Add a TreeCursor type that travels over injection layers
Browse files Browse the repository at this point in the history
This uses the layer parentage information from the parent commit to
traverse the layers. It's a similar API to `tree_sitter:TreeCursor`
but internally it does not use a `tree_sitter::TreeCursor` currently
because that interface is behaving very unexpectedly. Using the
`next_sibling`/`prev_sibling`/`parent` API on `tree_sitter::Node`
reflects the previous code's behavior so this should result in no
surprising changes.
  • Loading branch information
the-mikedavis committed Mar 19, 2024
1 parent c14af06 commit 1459f57
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 3 deletions.
16 changes: 13 additions & 3 deletions helix-core/src/syntax.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
mod tree_cursor;

use crate::{
auto_pairs::AutoPairs,
chars::char_is_line_ending,
Expand Down Expand Up @@ -32,6 +34,8 @@ use serde::{ser::SerializeSeq, Deserialize, Serialize};

use helix_loader::grammar::{get_language, load_runtime_file};

pub use tree_cursor::TreeCursor;

fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<Regex>, D::Error>
where
D: serde::Deserializer<'de>,
Expand Down Expand Up @@ -1495,6 +1499,12 @@ impl Syntax {
.descendant_for_byte_range(start, end)
}

pub fn walk(&self) -> TreeCursor<'_> {
// data structure to find the smallest range that contains a point
// when some of the ranges in the structure can overlap.
TreeCursor::new(&self.layers, self.root)
}

// Commenting
// comment_strings_for_pos
// is_commented
Expand Down Expand Up @@ -1723,7 +1733,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::{iter, mem, ops, str, usize};
use tree_sitter::{
Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
QueryMatch, Range, TextProvider, Tree, TreeCursor,
QueryMatch, Range, TextProvider, Tree,
};

const CANCELLATION_CHECK_INTERVAL: usize = 100;
Expand Down Expand Up @@ -2657,7 +2667,7 @@ pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result

fn pretty_print_tree_impl<W: fmt::Write>(
fmt: &mut W,
cursor: &mut TreeCursor,
cursor: &mut tree_sitter::TreeCursor,
depth: usize,
) -> fmt::Result {
let node = cursor.node();
Expand Down Expand Up @@ -2967,7 +2977,7 @@ mod test {
// rule but `name` and `body` belong to an unnamed helper `_method_rest`.
// This can cause a bug with a pretty-printing implementation that
// uses `Node::field_name_for_child` to determine field names but is
// fixed when using `TreeCursor::field_name`.
// fixed when using `tree_sitter::TreeCursor::field_name`.
let source = "def self.method_name
true
end";
Expand Down
160 changes: 160 additions & 0 deletions helix-core/src/syntax/tree_cursor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
use std::{cmp::Reverse, ops::Range};

use super::{LanguageLayer, LayerId};

use slotmap::HopSlotMap;
use tree_sitter::Node;

/// The byte range of an injection layer.
///
/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges.
/// This allows us to sort the ranges ahead of time in order to efficiently find a range that
/// contains a point with maximum depth.
#[derive(Debug)]
struct InjectionRange {
start: usize,
end: usize,
layer_id: LayerId,
depth: u32,
}

pub struct TreeCursor<'a> {
layers: &'a HopSlotMap<LayerId, LanguageLayer>,
root: LayerId,
current: LayerId,
injection_ranges: Vec<InjectionRange>,
// TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but
// that returns very surprising results in testing.
cursor: Node<'a>,
}

impl<'a> TreeCursor<'a> {
pub(super) fn new(layers: &'a HopSlotMap<LayerId, LanguageLayer>, root: LayerId) -> Self {
let mut injection_ranges = Vec::new();

for (layer_id, layer) in layers.iter() {
// Skip the root layer
if layer.parent.is_none() {
continue;
}
for byte_range in layer.ranges.iter() {
let range = InjectionRange {
start: byte_range.start_byte,
end: byte_range.end_byte,
layer_id,
depth: layer.depth,
};
injection_ranges.push(range);
}
}

injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth)));

let cursor = layers[root].tree().root_node();

Self {
layers,
root,
current: root,
injection_ranges,
cursor,
}
}

pub fn node(&self) -> Node<'a> {
self.cursor
}

pub fn goto_parent(&mut self) -> bool {
if let Some(parent) = self.node().parent() {
self.cursor = parent;
return true;
}

// If we are already on the root layer, we cannot ascend.
if self.current == self.root {
return false;
}

// Ascend to the parent layer.
let range = self.node().byte_range();
let parent_id = self.layers[self.current]
.parent
.expect("non-root layers have a parent");
self.current = parent_id;
let root = self.layers[self.current].tree().root_node();
self.cursor = root
.descendant_for_byte_range(range.start, range.end)
.unwrap_or(root);

true
}

/// Finds the injection layer that has exactly the same range as the given `range`.
fn layer_id_of_byte_range(&self, search_range: Range<usize>) -> Option<LayerId> {
let start_idx = self
.injection_ranges
.partition_point(|range| range.end < search_range.end);

self.injection_ranges[start_idx..]
.iter()
.take_while(|range| range.end == search_range.end)
.find_map(|range| (range.start == search_range.start).then_some(range.layer_id))
}

pub fn goto_first_child(&mut self) -> bool {
// Check if the current node's range is an exact injection layer range.
if let Some(layer_id) = self
.layer_id_of_byte_range(self.node().byte_range())
.filter(|&layer_id| layer_id != self.current)
{
// Switch to the child layer.
self.current = layer_id;
self.cursor = self.layers[self.current].tree().root_node();
true
} else if let Some(child) = self.cursor.child(0) {
// Otherwise descend in the current tree.
self.cursor = child;
true
} else {
false
}
}

pub fn goto_next_sibling(&mut self) -> bool {
if let Some(sibling) = self.cursor.next_sibling() {
self.cursor = sibling;
true
} else {
false
}
}

pub fn goto_prev_sibling(&mut self) -> bool {
if let Some(sibling) = self.cursor.prev_sibling() {
self.cursor = sibling;
true
} else {
false
}
}

/// Finds the injection layer that contains the given start-end range.
fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId {
let start_idx = self
.injection_ranges
.partition_point(|range| range.end < end);

self.injection_ranges[start_idx..]
.iter()
.take_while(|range| range.start < end)
.find_map(|range| (range.start <= start).then_some(range.layer_id))
.unwrap_or(self.root)
}

pub fn reset_to_byte_range(&mut self, start: usize, end: usize) {
self.current = self.layer_id_containing_byte_range(start, end);
let root = self.layers[self.current].tree().root_node();
self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root);
}
}

0 comments on commit 1459f57

Please sign in to comment.