Skip to content

Commit

Permalink
feat: add thediff::resource_cache() low-level utility for rapid in-…
Browse files Browse the repository at this point in the history
…memory diffing of combinations of resources.

We also add the `object::tree::diff::Platform::for_each_to_obtain_tree_with_cache()` to pass a resource-cache
for re-use between multiple invocation for significant savings.
  • Loading branch information
Byron committed Dec 2, 2023
1 parent feca5d0 commit 4aea9b0
Show file tree
Hide file tree
Showing 12 changed files with 417 additions and 21 deletions.
98 changes: 91 additions & 7 deletions gix/src/config/cache/access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,97 @@ impl Cache {
.copied()
}

#[cfg(feature = "blob-diff")]
pub(crate) fn diff_drivers(&self) -> Result<Vec<gix_diff::blob::Driver>, config::diff::drivers::Error> {
use crate::config::cache::util::ApplyLeniencyDefault;
let mut out = Vec::<gix_diff::blob::Driver>::new();
for section in self
.resolved
.sections_by_name("diff")
.into_iter()
.flatten()
.filter(|s| (self.filter_config_section)(s.meta()))
{
let Some(name) = section.header().subsection_name().filter(|n| !n.is_empty()) else {
continue;
};

let driver = match out.iter_mut().find(|d| d.name == name) {
Some(existing) => existing,
None => {
out.push(gix_diff::blob::Driver {
name: name.into(),
..Default::default()
});
out.last_mut().expect("just pushed")
}
};

if let Some(binary) = section.value_implicit("binary") {
driver.is_binary = config::tree::Diff::DRIVER_BINARY
.try_into_binary(binary)
.with_leniency(self.lenient_config)
.map_err(|err| config::diff::drivers::Error {
name: driver.name.clone(),
attribute: "binary",
source: Box::new(err),
})?;
}
if let Some(command) = section.value(config::tree::Diff::DRIVER_COMMAND.name) {
driver.command = command.into_owned().into();
}
if let Some(textconv) = section.value(config::tree::Diff::DRIVER_TEXTCONV.name) {
driver.binary_to_text_command = textconv.into_owned().into();
}
if let Some(algorithm) = section.value("algorithm") {
driver.algorithm = config::tree::Diff::DRIVER_ALGORITHM
.try_into_algorithm(algorithm)
.or_else(|err| match err {
config::diff::algorithm::Error::Unimplemented { .. } if self.lenient_config => {
Ok(gix_diff::blob::Algorithm::Histogram)
}
err => Err(err),
})
.with_lenient_default(self.lenient_config)
.map_err(|err| config::diff::drivers::Error {
name: driver.name.clone(),
attribute: "algorithm",
source: Box::new(err),
})?
.into();
}
}
Ok(out)
}

#[cfg(feature = "blob-diff")]
pub(crate) fn diff_pipeline_options(
&self,
) -> Result<gix_diff::blob::pipeline::Options, config::diff::pipeline_options::Error> {
Ok(gix_diff::blob::pipeline::Options {
large_file_threshold_bytes: self.big_file_threshold()?,
fs: self.fs_capabilities()?,
})
}

#[cfg(feature = "blob-diff")]
pub(crate) fn diff_renames(&self) -> Result<Option<crate::diff::Rewrites>, crate::diff::new_rewrites::Error> {
self.diff_renames
.get_or_try_init(|| crate::diff::new_rewrites(&self.resolved, self.lenient_config))
.copied()
}

#[cfg(feature = "blob-diff")]
pub(crate) fn big_file_threshold(&self) -> Result<u64, config::unsigned_integer::Error> {
Ok(self
.resolved
.integer_by_key("core.bigFileThreshold")
.map(|number| Core::BIG_FILE_THRESHOLD.try_into_u64(number))
.transpose()
.with_leniency(self.lenient_config)?
.unwrap_or(512 * 1024 * 1024))
}

/// Returns a user agent for use with servers.
#[cfg(any(feature = "async-network-client", feature = "blocking-network-client"))]
pub(crate) fn user_agent_tuple(&self) -> (&'static str, Option<Cow<'static, str>>) {
Expand Down Expand Up @@ -92,13 +183,6 @@ impl Cache {
})
}

#[cfg(feature = "blob-diff")]
pub(crate) fn diff_renames(&self) -> Result<Option<crate::diff::Rewrites>, crate::diff::new_rewrites::Error> {
self.diff_renames
.get_or_try_init(|| crate::diff::new_rewrites(&self.resolved, self.lenient_config))
.copied()
}

/// Returns (file-timeout, pack-refs timeout)
pub(crate) fn lock_timeout(
&self,
Expand Down
30 changes: 30 additions & 0 deletions gix/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,36 @@ pub mod diff {
Unimplemented { name: BString },
}
}

///
pub mod pipeline_options {
/// The error produced when obtaining options needed to fill in [gix_diff::blob::pipeline::Options].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
FilesystemCapabilities(#[from] crate::config::boolean::Error),
#[error(transparent)]
BigFileThreshold(#[from] crate::config::unsigned_integer::Error),
}
}

///
pub mod drivers {
use crate::bstr::BString;

/// The error produced when obtaining a list of [Drivers](gix_diff::blob::Driver).
#[derive(Debug, thiserror::Error)]
#[error("Failed to parse value of 'diff.{name}.{attribute}'")]
pub struct Error {
/// The name fo the driver.
pub name: BString,
/// The name of the attribute we tried to parse.
pub attribute: &'static str,
/// The actual error that occurred.
pub source: Box<dyn std::error::Error + Send + Sync + 'static>,
}
}
}

///
Expand Down
4 changes: 4 additions & 0 deletions gix/src/config/tree/sections/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ impl Core {
pub const ABBREV: Abbrev = Abbrev::new_with_validate("abbrev", &config::Tree::CORE, validate::Abbrev);
/// The `core.bare` key.
pub const BARE: keys::Boolean = keys::Boolean::new_boolean("bare", &config::Tree::CORE);
/// The `core.bigFileThreshold` key.
pub const BIG_FILE_THRESHOLD: keys::UnsignedInteger =
keys::UnsignedInteger::new_unsigned_integer("bigFileThreshold", &config::Tree::CORE);
/// The `core.checkStat` key.
pub const CHECK_STAT: CheckStat =
CheckStat::new_with_validate("checkStat", &config::Tree::CORE, validate::CheckStat);
Expand Down Expand Up @@ -95,6 +98,7 @@ impl Section for Core {
&[
&Self::ABBREV,
&Self::BARE,
&Self::BIG_FILE_THRESHOLD,
&Self::CHECK_STAT,
&Self::DELTA_BASE_CACHE_LIMIT,
&Self::DISAMBIGUATE,
Expand Down
68 changes: 67 additions & 1 deletion gix/src/config/tree/sections/diff.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::config::tree::SubSectionRequirement;
use crate::{
config,
config::tree::{keys, Diff, Key, Section},
Expand All @@ -17,6 +18,20 @@ impl Diff {
);
/// The `diff.renames` key.
pub const RENAMES: Renames = Renames::new_renames("renames", &config::Tree::DIFF);

/// The `diff.<driver>.command` key.
pub const DRIVER_COMMAND: keys::String = keys::String::new_string("command", &config::Tree::DIFF)
.with_subsection_requirement(Some(SubSectionRequirement::Parameter("driver")));
/// The `diff.<driver>.textconv` key.
pub const DRIVER_TEXTCONV: keys::String = keys::String::new_string("textconv", &config::Tree::DIFF)
.with_subsection_requirement(Some(SubSectionRequirement::Parameter("driver")));
/// The `diff.<driver>.algorithm` key.
pub const DRIVER_ALGORITHM: Algorithm =
Algorithm::new_with_validate("algorithm", &config::Tree::DIFF, validate::Algorithm)
.with_subsection_requirement(Some(SubSectionRequirement::Parameter("driver")));
/// The `diff.<driver>.binary` key.
pub const DRIVER_BINARY: Binary = Binary::new_with_validate("binary", &config::Tree::DIFF, validate::Binary)
.with_subsection_requirement(Some(SubSectionRequirement::Parameter("driver")));
}

impl Section for Diff {
Expand All @@ -25,7 +40,15 @@ impl Section for Diff {
}

fn keys(&self) -> &[&dyn Key] {
&[&Self::ALGORITHM, &Self::RENAME_LIMIT, &Self::RENAMES]
&[
&Self::ALGORITHM,
&Self::RENAME_LIMIT,
&Self::RENAMES,
&Self::DRIVER_COMMAND,
&Self::DRIVER_TEXTCONV,
&Self::DRIVER_ALGORITHM,
&Self::DRIVER_BINARY,
]
}
}

Expand All @@ -35,6 +58,9 @@ pub type Algorithm = keys::Any<validate::Algorithm>;
/// The `diff.renames` key.
pub type Renames = keys::Any<validate::Renames>;

/// The `diff.<driver>.binary` key.
pub type Binary = keys::Any<validate::Binary>;

mod algorithm {
use std::borrow::Cow;

Expand Down Expand Up @@ -67,6 +93,38 @@ mod algorithm {
}
}

mod binary {
use crate::config::tree::diff::Binary;

impl Binary {
/// Convert `value` into a tri-state boolean that can take the special value `auto`, resulting in `None`, or is a boolean.
/// If `None` is given, it's treated as implicit boolean `true`, as this method is made to be used
/// with [`gix_config::file::section::Body::value_implicit()`].
pub fn try_into_binary(
&'static self,
value: Option<std::borrow::Cow<'_, crate::bstr::BStr>>,
) -> Result<Option<bool>, crate::config::key::GenericErrorWithValue> {
Ok(match value {
None => Some(true),
Some(value) => {
if value.as_ref() == "auto" {
None
} else {
Some(
gix_config::Boolean::try_from(value.as_ref())
.map(|b| b.0)
.map_err(|err| {
crate::config::key::GenericErrorWithValue::from_value(self, value.into_owned())
.with_source(err)
})?,
)
}
}
})
}
}
}

mod renames {
use crate::{
bstr::ByteSlice,
Expand Down Expand Up @@ -125,4 +183,12 @@ mod validate {
Ok(())
}
}

pub struct Binary;
impl keys::Validate for Binary {
fn validate(&self, value: &BStr) -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
Diff::DRIVER_BINARY.try_into_binary(Some(value.into()))?;
Ok(())
}
}
}
64 changes: 63 additions & 1 deletion gix/src/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ mod utils {
use crate::config::cache::util::ApplyLeniency;
use crate::config::tree::Diff;
use crate::diff::rename::Tracking;
use crate::Repository;
use gix_diff::rewrites::Copies;
use gix_diff::Rewrites;

Expand All @@ -38,6 +39,27 @@ mod utils {
}
}

///
pub mod resource_cache {
/// The error returned by [`resource_cache()`](super::resource_cache()).
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
DiffAlgorithm(#[from] crate::config::diff::algorithm::Error),
#[error(transparent)]
WorktreeFilterOptions(#[from] crate::filter::pipeline::options::Error),
#[error(transparent)]
DiffDrivers(#[from] crate::config::diff::drivers::Error),
#[error(transparent)]
DiffPipelineOptions(#[from] crate::config::diff::pipeline_options::Error),
#[error(transparent)]
CommandContext(#[from] crate::config::command_context::Error),
#[error(transparent)]
AttributeStack(#[from] crate::config::attribute_stack::Error),
}
}

/// Create an instance by reading all relevant information from the `config`uration, while being `lenient` or not.
/// Returns `Ok(None)` if nothing is configured.
///
Expand Down Expand Up @@ -75,6 +97,46 @@ mod utils {
}
.into())
}

/// Return a low-level utility to efficiently prepare a the blob-level diff operation between two resources,
/// and cache these diffable versions so that matrix-like MxN diffs are efficient.
///
/// `repo` is used to obtain the needed configuration values, and `index` is used to potentially read `.gitattributes`
/// files from which may affect the diff operation.
/// `mode` determines how the diffable files will look like, and also how fast, in average, these conversions are.
/// `roots` provide information about where to get diffable data from, so source and destination can either be sourced from
/// a worktree, or from the object database, or both.
pub fn resource_cache(
repo: &Repository,
index: &gix_index::State,
mode: gix_diff::blob::pipeline::Mode,
roots: gix_diff::blob::pipeline::WorktreeRoots,
) -> Result<gix_diff::blob::Platform, resource_cache::Error> {
let diff_algo = repo.config.diff_algorithm()?;
let diff_cache = gix_diff::blob::Platform::new(
gix_diff::blob::platform::Options {
algorithm: Some(diff_algo),
skip_internal_diff_if_external_is_configured: false,
},
gix_diff::blob::Pipeline::new(
roots,
gix_filter::Pipeline::new(repo.command_context()?, crate::filter::Pipeline::options(repo)?),
repo.config.diff_drivers()?,
repo.config.diff_pipeline_options()?,
),
mode,
repo.attributes_only(
// TODO(perf): this could benefit from not having to build an intermediate index,
// and traverse the a tree directly.
index,
// This is an optimization, as we avoid reading files from the working tree, which also
// might not match the index at all depending on what the user passed.
gix_worktree::stack::state::attributes::Source::IdMapping,
)?
.inner,
);
Ok(diff_cache)
}
}
#[cfg(feature = "blob-diff")]
pub use utils::new_rewrites;
pub use utils::{new_rewrites, resource_cache};
Loading

0 comments on commit 4aea9b0

Please sign in to comment.