Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ buck-out/
.vscode/


src/.DS_Store
.DS_Store
1 change: 1 addition & 0 deletions BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,6 @@ rust_library(
"//third-party/rust/crates/tracing/0.1.41:tracing",
"//third-party/rust/crates/uuid/1.18.1:uuid",
"//third-party/rust/crates/zstd-sys/2.0.16+zstd.1.5.7:zstd-sys",
"//third-party/rust/crates/sha2/0.10.9:sha2",
],
)
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ natord = "1.0.9"
tempfile = "3.23.0"
path-absolutize = "3.1.1"
similar = "2.7.0"
sha2 = "0.10.9"


[dev-dependencies]
Expand Down
69 changes: 41 additions & 28 deletions src/diff.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::hash::SHA1;
use crate::hash::ObjectHash;
use path_absolutize::Absolutize;
use similar::{Algorithm, ChangeTag, TextDiff};
use std::collections::{HashMap, HashSet, VecDeque};
Expand Down Expand Up @@ -87,13 +87,13 @@ impl Diff {

/// Compute diffs for a set of files, honoring an optional filter and emitting unified diffs.
pub fn diff<F>(
old_blobs: Vec<(PathBuf, SHA1)>,
new_blobs: Vec<(PathBuf, SHA1)>,
old_blobs: Vec<(PathBuf, ObjectHash)>,
new_blobs: Vec<(PathBuf, ObjectHash)>,
filter: Vec<PathBuf>,
read_content: F,
) -> Vec<DiffItem>
where
F: Fn(&PathBuf, &SHA1) -> Vec<u8>,
F: Fn(&PathBuf, &ObjectHash) -> Vec<u8>,
{
let (processed_files, old_blobs_map, new_blobs_map) =
Self::prepare_diff_data(old_blobs, new_blobs, &filter);
Expand Down Expand Up @@ -148,13 +148,16 @@ impl Diff {

/// Build maps, union file set, and apply filter/path checks
fn prepare_diff_data(
old_blobs: Vec<(PathBuf, SHA1)>,
new_blobs: Vec<(PathBuf, SHA1)>,
old_blobs: Vec<(PathBuf, ObjectHash)>,
new_blobs: Vec<(PathBuf, ObjectHash)>,
filter: &[PathBuf],
) -> (Vec<PathBuf>, HashMap<PathBuf, SHA1>, HashMap<PathBuf, SHA1>) {
let old_blobs_map: HashMap<PathBuf, SHA1> = old_blobs.into_iter().collect();
let new_blobs_map: HashMap<PathBuf, SHA1> = new_blobs.into_iter().collect();

) -> (
Vec<PathBuf>,
HashMap<PathBuf, ObjectHash>,
HashMap<PathBuf, ObjectHash>,
) {
let old_blobs_map: HashMap<PathBuf, ObjectHash> = old_blobs.into_iter().collect();
let new_blobs_map: HashMap<PathBuf, ObjectHash> = new_blobs.into_iter().collect();
// union set
let union_files: HashSet<PathBuf> = old_blobs_map
.keys()
Expand All @@ -174,8 +177,8 @@ impl Diff {
fn should_process(
file: &PathBuf,
filter: &[PathBuf],
old_blobs: &HashMap<PathBuf, SHA1>,
new_blobs: &HashMap<PathBuf, SHA1>,
old_blobs: &HashMap<PathBuf, ObjectHash>,
new_blobs: &HashMap<PathBuf, ObjectHash>,
) -> bool {
if !filter.is_empty()
&& !filter
Expand All @@ -194,7 +197,7 @@ impl Diff {
Ok(path_abs.starts_with(parent_abs))
}

fn short_hash(hash: Option<&SHA1>) -> String {
fn short_hash(hash: Option<&ObjectHash>) -> String {
hash.map(|h| {
let hex = h.to_string();
let take = Self::SHORT_HASH_LEN.min(hex.len());
Expand All @@ -206,9 +209,9 @@ impl Diff {
/// Format a single file's unified diff string.
pub fn diff_for_file_string(
file: &PathBuf,
old_blobs: &HashMap<PathBuf, SHA1>,
new_blobs: &HashMap<PathBuf, SHA1>,
read_content: &dyn Fn(&PathBuf, &SHA1) -> Vec<u8>,
old_blobs: &HashMap<PathBuf, ObjectHash>,
new_blobs: &HashMap<PathBuf, ObjectHash>,
read_content: &dyn Fn(&PathBuf, &ObjectHash) -> Vec<u8>,
) -> String {
let new_hash = new_blobs.get(file);
let old_hash = old_blobs.get(file);
Expand All @@ -221,8 +224,8 @@ impl Diff {
/// Format a single file's unified diff using preloaded bytes to avoid re-reading.
fn diff_for_file_preloaded(
file: &Path,
old_hash: Option<&SHA1>,
new_hash: Option<&SHA1>,
old_hash: Option<&ObjectHash>,
new_hash: Option<&ObjectHash>,
old_bytes: &[u8],
new_bytes: &[u8],
) -> String {
Expand Down Expand Up @@ -478,19 +481,23 @@ pub fn compute_diff(old_lines: &[String], new_lines: &[String]) -> Vec<DiffOpera
#[cfg(test)]
mod tests {
use super::{Diff, DiffOperation, compute_diff};
use crate::hash::SHA1;
use crate::hash::{HashKind, ObjectHash, set_hash_kind_for_test};
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use tempfile::tempdir;

fn run_diff(logical_path: &str, old_bytes: &[u8], new_bytes: &[u8]) -> (String, SHA1, SHA1) {
fn run_diff(
logical_path: &str,
old_bytes: &[u8],
new_bytes: &[u8],
) -> (String, ObjectHash, ObjectHash) {
let file = PathBuf::from(logical_path);
let old_hash = SHA1::new(old_bytes);
let new_hash = SHA1::new(new_bytes);
let old_hash = ObjectHash::new(old_bytes);
let new_hash = ObjectHash::new(new_bytes);

let mut blob_store: HashMap<SHA1, Vec<u8>> = HashMap::new();
let mut blob_store: HashMap<ObjectHash, Vec<u8>> = HashMap::new();
blob_store.insert(old_hash, old_bytes.to_vec());
blob_store.insert(new_hash, new_bytes.to_vec());

Expand All @@ -499,23 +506,24 @@ mod tests {
old_map.insert(file.clone(), old_hash);
new_map.insert(file.clone(), new_hash);

let reader =
|_: &PathBuf, h: &SHA1| -> Vec<u8> { blob_store.get(h).cloned().unwrap_or_default() };
let reader = |_: &PathBuf, h: &ObjectHash| -> Vec<u8> {
blob_store.get(h).cloned().unwrap_or_default()
};

let diff = Diff::diff_for_file_string(&file, &old_map, &new_map, &reader);
(diff, old_hash, new_hash)
}

fn short_hash(hash: &SHA1) -> String {
fn short_hash(hash: &ObjectHash) -> String {
hash.to_string().chars().take(7).collect()
}

fn normalized_git_diff(
logical_path: &str,
old_bytes: &[u8],
new_bytes: &[u8],
old_hash: &SHA1,
new_hash: &SHA1,
old_hash: &ObjectHash,
new_hash: &ObjectHash,
) -> Option<String> {
let temp_dir = tempdir().ok()?;
let old_file = temp_dir.path().join("old.txt");
Expand Down Expand Up @@ -564,6 +572,7 @@ mod tests {

#[test]
fn unified_diff_basic_changes() {
let _guard = set_hash_kind_for_test(HashKind::Sha256);
let old = b"a\nb\nc\n" as &[u8];
let new = b"a\nB\nc\nd\n" as &[u8];
let (diff, _, _) = run_diff("foo.txt", old, new);
Expand All @@ -580,6 +589,7 @@ mod tests {

#[test]
fn binary_files_detection() {
let _guard = set_hash_kind_for_test(HashKind::Sha256);
let old_bytes = vec![0u8, 159, 146, 150];
let new_bytes = vec![0xFF, 0x00, 0x01];
let (diff, _, _) = run_diff("bin.dat", &old_bytes, &new_bytes);
Expand All @@ -588,6 +598,7 @@ mod tests {

#[test]
fn diff_matches_git_for_fixture() {
let _guard = set_hash_kind_for_test(HashKind::Sha256); //use it to test SHA1/SHA-256 diffs as well
let base: PathBuf = [env!("CARGO_MANIFEST_DIR"), "tests", "diff"]
.iter()
.collect();
Expand Down Expand Up @@ -628,6 +639,7 @@ mod tests {

#[test]
fn diff_matches_git_for_large_change() {
let _guard = set_hash_kind_for_test(HashKind::Sha256);
let old_lines: Vec<String> = (0..5_000).map(|i| format!("line {i}")).collect();
let mut new_lines = old_lines.clone();
for idx in [10, 499, 1_234, 3_210, 4_999] {
Expand Down Expand Up @@ -670,6 +682,7 @@ mod tests {

#[test]
fn compute_diff_operations_basic_mapping() {
let _guard = set_hash_kind_for_test(HashKind::Sha256);
let old_lines = vec!["a".to_string(), "b".to_string(), "c".to_string()];
let new_lines = vec![
"a".to_string(),
Expand Down
Loading