From e1fcc7f69e7b95aeac8cbbde3719a1e6b9dafeba Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 9 May 2023 15:38:09 +0200 Subject: [PATCH] feat: `gix attributes validate` to validate attributes and ignore against `git` as baseline. Use this command to test the entire index for ignored paths and their attributes and use `git check-attr` and `git check-ignore` to validate that `git` agrees. Collect all mismatches and print them. --- gitoxide-core/src/pack/receive.rs | 3 +- gitoxide-core/src/repository/attributes.rs | 66 ---- .../src/repository/attributes/mod.rs | 5 + .../src/repository/attributes/query.rs | 69 ++++ .../attributes/validate_baseline.rs | 366 ++++++++++++++++++ src/plumbing/main.rs | 27 ++ src/plumbing/options/mod.rs | 10 + 7 files changed, 479 insertions(+), 67 deletions(-) delete mode 100644 gitoxide-core/src/repository/attributes.rs create mode 100644 gitoxide-core/src/repository/attributes/mod.rs create mode 100644 gitoxide-core/src/repository/attributes/query.rs create mode 100644 gitoxide-core/src/repository/attributes/validate_baseline.rs diff --git a/gitoxide-core/src/pack/receive.rs b/gitoxide-core/src/pack/receive.rs index 4caa4faa92d..32b273b90e6 100644 --- a/gitoxide-core/src/pack/receive.rs +++ b/gitoxide-core/src/pack/receive.rs @@ -221,7 +221,7 @@ mod async_io { &mut self.ctx, futures_lite::io::BlockOn::new(input), progress, - &refs, + refs, ) } } @@ -241,6 +241,7 @@ mod async_io { { let transport = net::connect( url, + #[allow(clippy::needless_update)] gix::protocol::transport::client::connect::Options { version: protocol.unwrap_or_default().into(), ..Default::default() diff --git a/gitoxide-core/src/repository/attributes.rs b/gitoxide-core/src/repository/attributes.rs deleted file mode 100644 index d37e2ea254a..00000000000 --- a/gitoxide-core/src/repository/attributes.rs +++ /dev/null @@ -1,66 +0,0 @@ -use std::io; - -use anyhow::bail; -use gix::prelude::FindExt; - -use crate::OutputFormat; - -pub mod query { - use crate::OutputFormat; - - pub struct Options { - pub format: OutputFormat, - pub statistics: bool, - } -} - -pub fn query( - repo: gix::Repository, - pathspecs: impl Iterator, - mut out: impl io::Write, - mut err: impl io::Write, - query::Options { format, statistics }: query::Options, -) -> anyhow::Result<()> { - if format != OutputFormat::Human { - bail!("JSON output isn't implemented yet"); - } - - let index = repo.index()?; - let mut cache = repo.attributes( - &index, - gix::worktree::cache::state::attributes::Source::WorktreeThenIdMapping, - gix::worktree::cache::state::ignore::Source::IdMapping, - None, - )?; - - let prefix = repo.prefix().expect("worktree - we have an index by now")?; - let mut matches = cache.attribute_matches(); - - for mut spec in pathspecs { - for path in spec.apply_prefix(&prefix).items() { - let is_dir = gix::path::from_bstr(path).metadata().ok().map(|m| m.is_dir()); - let entry = cache.at_entry(path, is_dir, |oid, buf| repo.objects.find_blob(oid, buf))?; - - if !entry.matching_attributes(&mut matches) { - continue; - } - for m in matches.iter() { - writeln!( - out, - "{}:{}:{}\t{}\t{}", - m.location.source.map(|p| p.to_string_lossy()).unwrap_or_default(), - m.location.sequence_number, - m.pattern, - path, - m.assignment - )?; - } - } - } - - if let Some(stats) = statistics.then(|| cache.take_statistics()) { - out.flush()?; - writeln!(err, "{:#?}", stats).ok(); - } - Ok(()) -} diff --git a/gitoxide-core/src/repository/attributes/mod.rs b/gitoxide-core/src/repository/attributes/mod.rs new file mode 100644 index 00000000000..ad49bf3c3ca --- /dev/null +++ b/gitoxide-core/src/repository/attributes/mod.rs @@ -0,0 +1,5 @@ +pub mod query; +pub use query::function::query; + +pub mod validate_baseline; +pub use validate_baseline::function::validate_baseline; diff --git a/gitoxide-core/src/repository/attributes/query.rs b/gitoxide-core/src/repository/attributes/query.rs new file mode 100644 index 00000000000..3e170117065 --- /dev/null +++ b/gitoxide-core/src/repository/attributes/query.rs @@ -0,0 +1,69 @@ +use crate::OutputFormat; + +pub struct Options { + pub format: OutputFormat, + pub statistics: bool, +} + +pub(crate) mod function { + use crate::repository::attributes::query::{attributes_cache, Options}; + use crate::OutputFormat; + use std::io; + + use anyhow::bail; + use gix::prelude::FindExt; + + pub fn query( + repo: gix::Repository, + pathspecs: impl Iterator, + mut out: impl io::Write, + mut err: impl io::Write, + Options { format, statistics }: Options, + ) -> anyhow::Result<()> { + if format != OutputFormat::Human { + bail!("JSON output isn't implemented yet"); + } + + let mut cache = attributes_cache(&repo)?; + let prefix = repo.prefix().expect("worktree - we have an index by now")?; + let mut matches = cache.attribute_matches(); + + for mut spec in pathspecs { + for path in spec.apply_prefix(&prefix).items() { + let is_dir = gix::path::from_bstr(path).metadata().ok().map(|m| m.is_dir()); + let entry = cache.at_entry(path, is_dir, |oid, buf| repo.objects.find_blob(oid, buf))?; + + if !entry.matching_attributes(&mut matches) { + continue; + } + for m in matches.iter() { + writeln!( + out, + "{}:{}:{}\t{}\t{}", + m.location.source.map(|p| p.to_string_lossy()).unwrap_or_default(), + m.location.sequence_number, + m.pattern, + path, + m.assignment + )?; + } + } + } + + if let Some(stats) = statistics.then(|| cache.take_statistics()) { + out.flush()?; + writeln!(err, "{:#?}", stats).ok(); + } + Ok(()) + } +} + +pub(crate) fn attributes_cache(repo: &gix::Repository) -> anyhow::Result { + let index = repo.index()?; + Ok(repo.attributes( + &index, + gix::worktree::cache::state::attributes::Source::WorktreeThenIdMapping, + gix::worktree::cache::state::ignore::Source::IdMapping, + None, + )?) +} diff --git a/gitoxide-core/src/repository/attributes/validate_baseline.rs b/gitoxide-core/src/repository/attributes/validate_baseline.rs new file mode 100644 index 00000000000..34b3549ad54 --- /dev/null +++ b/gitoxide-core/src/repository/attributes/validate_baseline.rs @@ -0,0 +1,366 @@ +use crate::OutputFormat; + +pub struct Options { + pub format: OutputFormat, + pub statistics: bool, + pub ignore: bool, +} + +pub(crate) mod function { + use std::collections::BTreeSet; + use std::io; + use std::io::{BufRead, Write}; + use std::iter::Peekable; + use std::ops::Sub; + use std::path::PathBuf; + use std::sync::atomic::Ordering; + + use anyhow::{anyhow, bail}; + use gix::odb::FindExt; + use gix::Progress; + + use crate::repository::attributes::query::attributes_cache; + use crate::repository::attributes::validate_baseline::Options; + use crate::OutputFormat; + + pub fn validate_baseline( + repo: gix::Repository, + pathspecs: Option + Send + 'static>, + mut progress: impl Progress + 'static, + mut out: impl io::Write, + mut err: impl io::Write, + Options { + format, + statistics, + ignore, + }: Options, + ) -> anyhow::Result<()> { + if format != OutputFormat::Human { + bail!("JSON output isn't implemented yet"); + } + + let mut num_entries = None; + let pathspecs = pathspecs + .map(|i| anyhow::Result::Ok(Box::new(i) as Box + Send + 'static>)) + .unwrap_or_else({ + let repo = repo.clone(); + let num_entries = &mut num_entries; + move || -> anyhow::Result<_> { + let index = repo.open_index()?; + let (entries, path_backing) = index.into_parts().0.into_entries(); + *num_entries = Some(entries.len()); + Ok(Box::new(entries.into_iter().map(move |e| { + gix::path::Spec::from_bytes(e.path_in(&path_backing)).expect("each entry path is a valid spec") + }))) + } + })?; + + let work_dir = repo + .work_dir() + .map(ToOwned::to_owned) + .ok_or_else(|| anyhow!("repository at {:?} must have a worktree checkout", repo.path()))?; + let (tx_base, rx_base) = std::sync::mpsc::channel::<(String, Baseline)>(); + let feed_attrs = { + let (tx, rx) = std::sync::mpsc::sync_channel::(1); + std::thread::spawn({ + let path = work_dir.clone(); + let tx_base = tx_base.clone(); + let mut progress = progress.add_child("attributes"); + move || -> anyhow::Result<()> { + let mut child = std::process::Command::new(GIT_NAME) + .args(["check-attr", "--stdin", "-a"]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .current_dir(path) + .spawn()?; + + std::thread::spawn({ + let mut stdin = child.stdin.take().expect("we configured it"); + move || -> anyhow::Result<()> { + progress.init(num_entries, gix::progress::count("paths")); + let start = std::time::Instant::now(); + for spec in rx { + progress.inc(); + for path in spec.items() { + stdin.write_all(path.as_ref())?; + stdin.write_all(b"\n")?; + } + } + progress.show_throughput(start); + Ok(()) + } + }); + + let stdout = std::io::BufReader::new(child.stdout.take().expect("we configured it")); + let mut lines = stdout.lines().filter_map(Result::ok).peekable(); + while let Some(baseline) = parse_attributes(&mut lines) { + if tx_base.send(baseline).is_err() { + child.kill().ok(); + break; + } + } + + Ok(()) + } + }); + tx + }; + let feed_excludes = ignore.then(|| { + let (tx, rx) = std::sync::mpsc::sync_channel::(1); + std::thread::spawn({ + let path = work_dir.clone(); + let tx_base = tx_base.clone(); + let mut progress = progress.add_child("excludes"); + move || -> anyhow::Result<()> { + let mut child = std::process::Command::new(GIT_NAME) + .args(["check-ignore", "--stdin", "-nv", "--no-index"]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .current_dir(path) + .spawn()?; + + std::thread::spawn({ + let mut stdin = child.stdin.take().expect("we configured it"); + move || -> anyhow::Result<()> { + progress.init(num_entries, gix::progress::count("paths")); + let start = std::time::Instant::now(); + for spec in rx { + progress.inc(); + for path in spec.items() { + stdin.write_all(path.as_ref())?; + stdin.write_all(b"\n")?; + } + } + progress.show_throughput(start); + Ok(()) + } + }); + + let stdout = std::io::BufReader::new(child.stdout.take().expect("we configured it")); + for line in stdout.lines() { + let line = line?; + if let Some(baseline) = parse_exclude(&line) { + if tx_base.send(baseline).is_err() { + child.kill().ok(); + break; + } + } else { + eprintln!("Failed to parse line {line:?} - ignored"); + } + } + + Ok(()) + } + }); + tx + }); + drop(tx_base); + + std::thread::spawn(move || { + for spec in pathspecs { + if feed_attrs.send(spec.clone()).is_err() { + break; + } + if let Some(ch) = feed_excludes.as_ref() { + if ch.send(spec).is_err() { + break; + } + } + } + }); + + let mut cache = attributes_cache(&repo)?; + let mut matches = cache.attribute_matches(); + let mut progress = progress.add_child("validate"); + let mut mismatches = Vec::new(); + let start = std::time::Instant::now(); + progress.init( + num_entries.map(|n| n + if ignore { n } else { 0 }), + gix::progress::count("paths"), + ); + + for (rela_path, baseline) in rx_base { + let entry = cache.at_entry(rela_path.as_str(), Some(false), |oid, buf| { + repo.objects.find_blob(oid, buf) + })?; + match baseline { + Baseline::Attribute { assignments: expected } => { + entry.matching_attributes(&mut matches); + let fast_path_mismatch = matches + .iter() + .map(|m| m.assignment) + .zip(expected.iter().map(|a| a.as_ref())) + .any(|(a, b)| a != b); + if fast_path_mismatch { + let actual_set = BTreeSet::from_iter(matches.iter().map(|m| m.assignment)); + let expected_set = BTreeSet::from_iter(expected.iter().map(|a| a.as_ref())); + let too_few_or_too_many = + !(expected_set.sub(&actual_set).is_empty() && actual_set.sub(&expected_set).is_empty()); + if too_few_or_too_many { + mismatches.push(( + rela_path, + Mismatch::Attributes { + actual: matches.iter().map(|m| m.assignment.to_owned()).collect(), + expected, + }, + )) + } + } + } + Baseline::Exclude { location } => { + let match_ = entry.matching_exclude_pattern(); + if match_.is_some() != location.is_some() { + mismatches.push(( + rela_path, + Mismatch::Exclude { + actual: match_.map(Into::into), + expected: location, + }, + )) + } + } + } + progress.inc(); + } + + if let Some(stats) = statistics.then(|| cache.take_statistics()) { + out.flush()?; + writeln!(err, "{:#?}", stats).ok(); + } + progress.show_throughput(start); + + if mismatches.is_empty() { + Ok(()) + } else { + for (rela_path, mm) in &mismatches { + writeln!(err, "{rela_path}: {mm:#?}").ok(); + } + bail!( + "{}: Validation failed with {} mismatches out of {}", + gix::path::realpath(&work_dir).unwrap_or(work_dir).display(), + mismatches.len(), + progress + .counter() + .map(|a| a.load(Ordering::Relaxed)) + .unwrap_or_default() + ); + } + } + + static GIT_NAME: &str = if cfg!(windows) { "git.exe" } else { "git" }; + + enum Baseline { + Attribute { assignments: Vec }, + Exclude { location: Option }, + } + + #[derive(Debug)] + pub struct ExcludeLocation { + pub line: usize, + pub rela_source_file: String, + pub pattern: String, + } + + #[derive(Debug)] + pub enum Mismatch { + Attributes { + actual: Vec, + expected: Vec, + }, + Exclude { + actual: Option, + expected: Option, + }, + } + + #[derive(Debug)] + pub struct ExcludeMatch { + pub pattern: gix::glob::Pattern, + pub source: Option, + pub sequence_number: usize, + } + + impl From> for ExcludeMatch { + fn from(value: gix::ignore::search::Match<'_, ()>) -> Self { + ExcludeMatch { + pattern: value.pattern.clone(), + source: value.source.map(ToOwned::to_owned), + sequence_number: value.sequence_number, + } + } + } + + fn parse_exclude(line: &str) -> Option<(String, Baseline)> { + let (left, value) = line.split_at(line.find(|c| c == '\t')?); + let value = &value[1..]; + + let location = if left == "::" { + None + } else { + let mut tokens = left.split(|b| b == ':'); + let source = tokens.next()?; + let line_number: usize = tokens.next()?.parse().ok()?; + let pattern = tokens.next()?; + Some(ExcludeLocation { + line: line_number, + rela_source_file: source.into(), + pattern: pattern.into(), + }) + }; + Some((value.to_string(), Baseline::Exclude { location })) + } + + fn parse_attributes(lines: &mut Peekable>) -> Option<(String, Baseline)> { + let first = lines.next()?; + let mut out = Vec::new(); + let (path, assignment) = parse_attribute_line(&first)?; + + let current = path.to_owned(); + out.push(assignment.to_owned()); + loop { + let next_line = match lines.peek() { + None => break, + Some(l) => l, + }; + let (next_path, next_assignment) = parse_attribute_line(next_line)?; + if next_path != current { + return Some((current, Baseline::Attribute { assignments: out })); + } else { + out.push(next_assignment.to_owned()); + lines.next(); + } + } + Some((current, Baseline::Attribute { assignments: out })) + } + + fn parse_attribute_line(line: &str) -> Option<(&str, gix::attrs::AssignmentRef<'_>)> { + use gix::attrs::StateRef; + use gix::bstr::ByteSlice; + + let mut prev = None; + let mut tokens = line.splitn(3, |b| { + let is_match = b == ' ' && prev.take() == Some(':'); + prev = Some(b); + is_match + }); + if let Some(((mut path, attr), info)) = tokens.next().zip(tokens.next()).zip(tokens.next()) { + let state = match info { + "set" => StateRef::Set, + "unset" => StateRef::Unset, + "unspecified" => StateRef::Unspecified, + _ => StateRef::from_bytes(info.as_bytes()), + }; + path = path.trim_end_matches(|b| b == ':'); + let attr = attr.trim_end_matches(|b| b == ':'); + let assignment = gix::attrs::AssignmentRef { + name: gix::attrs::NameRef::try_from(attr.as_bytes().as_bstr()).ok()?, + state, + }; + Some((path, assignment)) + } else { + None + } + } +} diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 6f8fc29c990..c6af57c7d61 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -859,6 +859,33 @@ pub fn main() -> Result<()> { ) }, ), + attributes::Subcommands::ValidateBaseline { statistics, no_ignore } => prepare_and_run( + "attributes-validate-baseline", + auto_verbose, + progress, + progress_keep_open, + None, + move |progress, out, err| { + use gix::bstr::ByteSlice; + core::repository::attributes::validate_baseline( + repository(Mode::StrictWithGitInstallConfig)?, + stdin_or_bail().ok().map(|stdin| { + stdin + .byte_lines() + .filter_map(Result::ok) + .filter_map(|line| gix::path::Spec::from_bytes(line.as_bstr())) + }), + progress, + out, + err, + core::repository::attributes::validate_baseline::Options { + format, + statistics, + ignore: !no_ignore, + }, + ) + }, + ), }, Subcommands::Exclude(cmd) => match cmd { exclude::Subcommands::Query { diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 69f818db54b..0e3b728d8fa 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -448,6 +448,16 @@ pub mod attributes { #[derive(Debug, clap::Subcommand)] pub enum Subcommands { + /// Run `git check-attr` and `git check-ignore` on all files of the index or all files passed via stdin and validate that + /// we get the same outcome when computing attributes. + ValidateBaseline { + /// Print various statistics to stderr + #[clap(long, short = 's')] + statistics: bool, + /// Don't validated excludes as obtaining them with `check-ignore` can be very slow. + #[clap(long)] + no_ignore: bool, + }, /// List all attributes of the given path-specs and display the result similar to `git check-attr`. Query { /// Print various statistics to stderr