Skip to content

Commit

Permalink
switch to asynchronus diffing with similar
Browse files Browse the repository at this point in the history
  • Loading branch information
pascalkuthe committed Sep 19, 2022
1 parent 47d3a80 commit 402a59a
Show file tree
Hide file tree
Showing 8 changed files with 375 additions and 65 deletions.
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions helix-vcs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@ homepage = "https://helix-editor.com"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]

git-repository = {version = "0.23.1", default-features = false}
tokio = { version = "1", features = ["rt", "rt-multi-thread", "time", "sync", "parking_lot", "macros"] }
similar = "2.2"
ropey = { version = "1.5", default-features = false, features = ["simd"] }
arc-swap = "1"


[dev-dependencies]
tempfile = "3.3"
180 changes: 180 additions & 0 deletions helix-vcs/src/differ.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
use std::mem::take;
use std::ops::Deref;
use std::sync::Arc;

use arc_swap::ArcSwap;
use ropey::{Rope, RopeSlice};
use similar::{capture_diff_slices_deadline, Algorithm, DiffTag};
use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
use tokio::task::JoinHandle;
use tokio::time::{timeout_at, Duration, Instant};

use crate::rope_line_cache::RopeLineCache;
use crate::{LineDiff, LineDiffs};

#[cfg(test)]
mod test;

#[derive(Clone, Debug)]
pub struct Differ {
channel: UnboundedSender<Event>,
line_diffs: Arc<ArcSwap<LineDiffs>>,
}

impl Differ {
pub fn new(diff_base: Rope, doc: Rope) -> Differ {
Differ::new_with_handle(diff_base, doc).0
}

fn new_with_handle(diff_base: Rope, doc: Rope) -> (Differ, JoinHandle<()>) {
let (sender, reciver) = unbounded_channel();
let line_diffs: Arc<ArcSwap<LineDiffs>> = Arc::default();
let worker = DiffWorker {
channel: reciver,
line_diffs: line_diffs.clone(),
new_line_diffs: LineDiffs::default(),
};
let handle = tokio::spawn(worker.run(diff_base, doc));
let differ = Differ {
channel: sender,
line_diffs,
};
(differ, handle)
}
pub fn get_line_diffs(&self) -> impl Deref<Target = impl Deref<Target = LineDiffs>> {
self.line_diffs.load()
}

pub fn update_document(&self, doc: Rope) -> bool {
self.channel.send(Event::UpdateDocument(doc)).is_ok()
}

pub fn update_diff_base(&self, diff_base: Rope) -> bool {
self.channel.send(Event::UpdateDiffBase(diff_base)).is_ok()
}
}

// TODO configuration
const DIFF_MAX_DEBOUNCE: u64 = 200;
const DIFF_DEBOUNCE: u64 = 10;
const DIFF_TIMEOUT: u64 = 200;
const MAX_DIFF_LEN: usize = 40000;
const ALGORITHM: Algorithm = Algorithm::Myers;

struct DiffWorker {
channel: UnboundedReceiver<Event>,
line_diffs: Arc<ArcSwap<LineDiffs>>,
new_line_diffs: LineDiffs,
}

impl DiffWorker {
async fn run(mut self, diff_base: Rope, doc: Rope) {
let mut diff_base = RopeLineCache::new(diff_base);
let mut doc = RopeLineCache::new(doc);
self.perform_diff(diff_base.lines(), doc.lines());
self.apply_line_diff();
while let Some(event) = self.channel.recv().await {
let mut accumulator = EventAccumulator::new();
accumulator.handle_event(event);
accumulator
.accumualte_debounced_events(&mut self.channel)
.await;

if let Some(new_doc) = accumulator.doc {
doc.update(new_doc)
}
if let Some(new_base) = accumulator.diff_base {
diff_base.update(new_base)
}

self.perform_diff(diff_base.lines(), doc.lines());
self.apply_line_diff();
}
}

/// update the line diff (used by the gutter) by replacing it with `self.new_line_diffs`.
/// `self.new_line_diffs` is always empty after this function runs.
/// To improve performance this function trys to reuse the allocation of the old diff previously stored in `self.line_diffs`
fn apply_line_diff(&mut self) {
let diff_to_apply = take(&mut self.new_line_diffs);
let old_line_diff = self.line_diffs.swap(Arc::new(diff_to_apply));
if let Ok(mut cached_alloc) = Arc::try_unwrap(old_line_diff) {
cached_alloc.clear();
self.new_line_diffs = cached_alloc;
}
}

fn perform_diff(&mut self, diff_base: &[RopeSlice<'_>], doc: &[RopeSlice<'_>]) {
if diff_base.len() > MAX_DIFF_LEN || doc.len() > MAX_DIFF_LEN {
return;
}
// TODO allow configuration algorithm
// TODO configure diff deadline

let diff = capture_diff_slices_deadline(
ALGORITHM,
diff_base,
doc,
Some(std::time::Instant::now() + std::time::Duration::from_millis(DIFF_TIMEOUT)),
);
for op in diff {
let (tag, _, line_range) = op.as_tag_tuple();
let op = match tag {
DiffTag::Insert => LineDiff::Added,
DiffTag::Replace => LineDiff::Modified,
DiffTag::Delete => {
self.add_line_diff(line_range.start, LineDiff::Deleted);
continue;
}
DiffTag::Equal => continue,
};

for line in line_range {
self.add_line_diff(line, op)
}
}
}

fn add_line_diff(&mut self, line: usize, op: LineDiff) {
self.new_line_diffs.insert(line, op);
}
}

struct EventAccumulator {
diff_base: Option<Rope>,
doc: Option<Rope>,
}
impl EventAccumulator {
fn new() -> EventAccumulator {
EventAccumulator {
diff_base: None,
doc: None,
}
}
fn handle_event(&mut self, event: Event) {
match event {
Event::UpdateDocument(doc) => self.doc = Some(doc),
Event::UpdateDiffBase(new_diff_base) => self.diff_base = Some(new_diff_base),
}
}
async fn accumualte_debounced_events(&mut self, channel: &mut UnboundedReceiver<Event>) {
let final_time = Instant::now() + Duration::from_millis(DIFF_MAX_DEBOUNCE);
let debounce = Duration::from_millis(DIFF_DEBOUNCE);
loop {
let mut debounce = Instant::now() + debounce;
if final_time < debounce {
debounce = final_time;
}
if let Ok(Some(event)) = timeout_at(debounce, channel.recv()).await {
self.handle_event(event)
} else {
break;
}
}
}
}

enum Event {
UpdateDocument(Rope),
UpdateDiffBase(Rope),
}
90 changes: 90 additions & 0 deletions helix-vcs/src/differ/test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
use ropey::Rope;
use tokio::task::JoinHandle;

use crate::{Differ, LineDiff};

impl Differ {
fn new_test(diff_base: &str, doc: &str) -> (Differ, JoinHandle<()>) {
Differ::new_with_handle(Rope::from_str(diff_base), Rope::from_str(doc))
}
async fn into_diff(self, handle: JoinHandle<()>) -> Vec<(usize, LineDiff)> {
let line_diffs = self.line_diffs;
// dropping th echannel terminates the task
drop(self.channel);
handle.await.unwrap();
let diffs = line_diffs.load();
let mut res: Vec<_> = diffs.iter().map(|(&line, &op)| (line, op)).collect();
res.sort_unstable_by_key(|&(line, _)| line);
res
}
}

#[tokio::test]
async fn append_line() {
let (differ, handle) = Differ::new_test("foo\n", "foo\nbar\n");
let line_diffs = differ.into_diff(handle).await;
assert_eq!(&line_diffs, &[(1, LineDiff::Added)])
}

#[tokio::test]
async fn prepend_line() {
let (differ, handle) = Differ::new_test("foo\n", "bar\nfoo\n");
let line_diffs = differ.into_diff(handle).await;
assert_eq!(&line_diffs, &[(0, LineDiff::Added)])
}

#[tokio::test]
async fn modify() {
let (differ, handle) = Differ::new_test("foo\nbar\n", "foo bar\nbar\n");
let line_diffs = differ.into_diff(handle).await;
assert_eq!(&line_diffs, &[(0, LineDiff::Modified)])
}

#[tokio::test]
async fn delete_line() {
let (differ, handle) = Differ::new_test("foo\nfoo bar\nbar\n", "foo\nbar\n");
let line_diffs = differ.into_diff(handle).await;
assert_eq!(&line_diffs, &[(1, LineDiff::Deleted)])
}

#[tokio::test]
async fn delete_line_and_modify() {
let (differ, handle) = Differ::new_test("foo\nbar\ntest\nfoo", "foo\ntest\nfoo bar");
let line_diffs = differ.into_diff(handle).await;
assert_eq!(
&line_diffs,
&[(1, LineDiff::Deleted), (2, LineDiff::Modified)]
)
}

#[tokio::test]
async fn add_use() {
let (differ, handle) = Differ::new_test(
"use ropey::Rope;\nuse tokio::task::JoinHandle;\n",
"use ropey::Rope;\nuse ropey::RopeSlice;\nuse tokio::task::JoinHandle;\n",
);
let line_diffs = differ.into_diff(handle).await;
assert_eq!(&line_diffs, &[(1, LineDiff::Added)])
}

#[tokio::test]
async fn update_document() {
let (differ, handle) = Differ::new_test("foo\nbar\ntest\nfoo", "foo\nbar\ntest\nfoo");
differ.update_document(Rope::from_str("foo\ntest\nfoo bar"));
let line_diffs = differ.into_diff(handle).await;
assert_eq!(
&line_diffs,
&[(1, LineDiff::Deleted), (2, LineDiff::Modified)]
)
}

#[tokio::test]
async fn update_base() {
let (differ, handle) = Differ::new_test("foo\ntest\nfoo bar", "foo\ntest\nfoo bar");
differ.update_diff_base(Rope::from_str("foo\nbar\ntest\nfoo"));
let line_diffs = differ.into_diff(handle).await;
assert_eq!(
&line_diffs,
&[(1, LineDiff::Deleted), (2, LineDiff::Modified)]
)
}
5 changes: 4 additions & 1 deletion helix-vcs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
use std::{collections::HashMap, path::Path};

pub use differ::Differ;
pub use git::Git;

mod differ;
mod git;
mod rope_line_cache;

// TODO: Move to helix_core once we have a generic diff mode
#[derive(Copy, Clone, Debug)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum LineDiff {
Added,
Deleted,
Expand Down
71 changes: 71 additions & 0 deletions helix-vcs/src/rope_line_cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
//! This modules encapsulates a tiny bit of unsafe code that
//! makes diffing signifcantly faster and more ergonomic to implement.
//! This code is necessaty because diffing requires quick random
//! access to the lines of the text that is being diffed.
//!
//! Therefore it is best to collect the `Rope::lines` iterator into a vec
//! first because access to the vec is `O(1)` where `Rope::line` is `O(log N)`.
//! However this process can allocate a (potentially quite large) vector.
//!
//! To avoid realoction for every diff, the vector is reused.
//! However the RopeSlice references the original rope and therefore forms a self-referential data structure.
//! A transmute is used to change the lifetime of the slice to static to circumwent that project.
use std::mem::transmute;

use ropey::{Rope, RopeSlice};

/// A cache that stores the `lines` of a rope as a vector.
/// It allows safely reusing the allocation of the vec when updating the rope
pub(crate) struct RopeLineCache {
rope: Rope,
lines: Vec<RopeSlice<'static>>,
}

impl RopeLineCache {
pub fn new(rope: Rope) -> RopeLineCache {
let mut res = RopeLineCache {
rope,
lines: Vec::new(),
};
res.update_lines();
res
}

pub fn update(&mut self, rope: Rope) {
self.lines.clear();
self.rope = rope;
self.update_lines()
}

fn update_lines(&mut self) {
debug_assert_eq!(self.lines.len(), 0);
// Safety: This transmute is save because it only transmutes a liftime which have no effect.
// The backing storage for the RopeSlices referred to by the lifetime is stored in `self.rope`.
// Therefore as long as `self.rope` is not dropped/replaced this memory remains valid.
// `self.rope` is only changed `self.update`, which clear the generated slices.
// Furthermore when these slices are exposed to consumer in `self.lines`, the lifetime is bounded to a reference to self.
// That means that on calls to update there exist no references to the slices in `self.lines`.
let lines = self
.rope
.lines()
.map(|line: RopeSlice| -> RopeSlice<'static> { unsafe { transmute(line) } });
self.lines.extend(lines);

// if self
// .lines
// .last()
// .and_then(|last| last.as_str())
// .map_or(false, |last| last.is_empty())
// {
// self.lines.pop();
// }
}

// pub fn rope(&self) -> &Rope {
// &self.rope
// }

pub fn lines(&self) -> &[RopeSlice] {
&self.lines
}
}
Loading

0 comments on commit 402a59a

Please sign in to comment.