Skip to content

Commit

Permalink
Merge branch 'rename-tracking'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Feb 11, 2023
2 parents 8bce6d5 + 81f0d47 commit 9e7d792
Show file tree
Hide file tree
Showing 19 changed files with 544 additions and 186 deletions.
10 changes: 10 additions & 0 deletions git-repository/src/config/cache/access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,16 @@ impl Cache {
.get_or_try_init(|| remote::url::SchemePermission::from_config(&self.resolved, self.filter_config_section))
}

pub(crate) fn diff_renames(
&self,
) -> Result<Option<crate::object::tree::diff::Renames>, crate::object::tree::diff::renames::Error> {
self.diff_renames
.get_or_try_init(|| {
crate::object::tree::diff::Renames::try_from_config(&self.resolved, self.lenient_config)
})
.copied()
}

/// Returns (file-timeout, pack-refs timeout)
pub(crate) fn lock_timeout(
&self,
Expand Down
2 changes: 2 additions & 0 deletions git-repository/src/config/cache/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ impl Cache {
user_agent: Default::default(),
personas: Default::default(),
url_rewrite: Default::default(),
diff_renames: Default::default(),
#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))]
url_scheme: Default::default(),
diff_algorithm: Default::default(),
Expand Down Expand Up @@ -226,6 +227,7 @@ impl Cache {
self.user_agent = Default::default();
self.personas = Default::default();
self.url_rewrite = Default::default();
self.diff_renames = Default::default();
self.diff_algorithm = Default::default();
(self.pack_cache_bytes, self.object_cache_bytes) =
util::parse_object_caches(config, self.lenient_config, self.filter_config_section)?;
Expand Down
2 changes: 2 additions & 0 deletions git-repository/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,8 @@ pub(crate) struct Cache {
pub(crate) personas: OnceCell<identity::Personas>,
/// A lazily loaded rewrite list for remote urls
pub(crate) url_rewrite: OnceCell<crate::remote::url::Rewrite>,
/// The lazy-loaded rename information for diffs.
pub(crate) diff_renames: OnceCell<Option<crate::object::tree::diff::Renames>>,
/// A lazily loaded mapping to know which url schemes to allow
#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))]
pub(crate) url_scheme: OnceCell<crate::remote::url::SchemePermission>,
Expand Down
62 changes: 61 additions & 1 deletion git-repository/src/config/tree/sections/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ impl Diff {
/// The `diff.algorithm` key.
pub const ALGORITHM: Algorithm = Algorithm::new_with_validate("algorithm", &config::Tree::DIFF, validate::Algorithm)
.with_deviation("'patience' diff is not implemented and can default to 'histogram' if lenient config is used, and defaults to histogram if unset for fastest and best results");
/// The `diff.renameLimit` key.
pub const RENAME_LIMIT: keys::UnsignedInteger = keys::UnsignedInteger::new_unsigned_integer(
"renameLimit",
&config::Tree::DIFF,
)
.with_note(
"The limit is actually squared, so 1000 stands for up to 1 million diffs if fuzzy rename tracking is enabled",
);
/// The `diff.renames` key.
pub const RENAMES: Renames = Renames::new_renames("renames", &config::Tree::DIFF);
}

impl Section for Diff {
Expand All @@ -15,13 +25,16 @@ impl Section for Diff {
}

fn keys(&self) -> &[&dyn Key] {
&[&Self::ALGORITHM]
&[&Self::ALGORITHM, &Self::RENAME_LIMIT, &Self::RENAMES]
}
}

/// The `diff.algorithm` key.
pub type Algorithm = keys::Any<validate::Algorithm>;

/// The `diff.renames` key.
pub type Renames = keys::Any<validate::Renames>;

mod algorithm {
use std::borrow::Cow;

Expand Down Expand Up @@ -54,11 +67,49 @@ mod algorithm {
}
}

mod renames {
use std::borrow::Cow;

use crate::config::tree::{keys, Section};
use crate::diff::rename::Tracking;
use crate::{
bstr::BStr,
config::{key::GenericError, tree::sections::diff::Renames},
};

impl Renames {
/// Create a new instance.
pub const fn new_renames(name: &'static str, section: &'static dyn Section) -> Self {
keys::Any::new_with_validate(name, section, super::validate::Renames)
}
/// Try to convert the configuration into a valid rename tracking variant. Use `value` and if it's an error, call `value_string`
/// to try and interpret the key as string.
pub fn try_into_renames<'a>(
&'static self,
value: Result<bool, git_config::value::Error>,
value_string: impl FnOnce() -> Option<Cow<'a, BStr>>,
) -> Result<Tracking, GenericError> {
Ok(match value {
Ok(true) => Tracking::Renames,
Ok(false) => Tracking::Disabled,
Err(err) => {
let value = value_string().ok_or_else(|| GenericError::from(self))?;
match value.as_ref().as_ref() {
b"copy" | b"copies" => Tracking::RenamesAndCopies,
_ => return Err(GenericError::from_value(self, value.into_owned()).with_source(err)),
}
}
})
}
}
}

mod validate {
use crate::{
bstr::BStr,
config::tree::{keys, Diff},
};
use std::borrow::Cow;

pub struct Algorithm;
impl keys::Validate for Algorithm {
Expand All @@ -67,4 +118,13 @@ mod validate {
Ok(())
}
}

pub struct Renames;
impl keys::Validate for Renames {
fn validate(&self, value: &BStr) -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
let boolean = git_config::Boolean::try_from(value).map(|b| b.0);
Diff::RENAMES.try_into_renames(boolean, || Some(Cow::Borrowed(value)))?;
Ok(())
}
}
}
21 changes: 20 additions & 1 deletion git-repository/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ pub use git_actor as actor;
pub use git_attributes as attrs;
pub use git_credentials as credentials;
pub use git_date as date;
pub use git_diff as diff;
pub use git_features as features;
use git_features::threading::OwnShared;
pub use git_features::{parallel, progress::Progress, threading};
Expand Down Expand Up @@ -139,6 +138,26 @@ pub mod progress {
pub use prodash::tree;
}

///
pub mod diff {
pub use git_diff::*;
///
pub mod rename {
/// Determine how to do rename tracking.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Tracking {
/// Do not track renames at all, the fastest option.
Disabled,
/// Track renames.
Renames,
/// Track renames and copies.
///
/// This is the most expensive option.
RenamesAndCopies,
}
}
}

/// See [ThreadSafeRepository::discover()], but returns a [`Repository`] instead.
#[allow(clippy::result_large_err)]
pub fn discover(directory: impl AsRef<std::path::Path>) -> Result<Repository, discover::Error> {
Expand Down
2 changes: 1 addition & 1 deletion git-repository/src/object/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ pub mod diff {
FnH: FnMut(line::Change<'_, '_>) -> Result<(), E>,
E: std::error::Error,
{
let input = git_diff::blob::intern::InternedInput::new(self.old.data.as_bytes(), self.new.data.as_bytes());
let input = self.line_tokens();
let mut err = None;
let mut lines = Vec::new();
git_diff::blob::diff(self.algo, &input, |before: Range<u32>, after: Range<u32>| {
Expand Down
66 changes: 64 additions & 2 deletions git-repository/src/object/tree/diff/change.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use crate::bstr::BStr;
use git_object::tree::EntryMode;

use crate::Id;

/// An event emitted when finding differences between two trees.
#[derive(Debug, Clone, Copy)]
pub enum Event<'old, 'new> {
pub enum Event<'a, 'old, 'new> {
/// An entry was added, like the addition of a file or directory.
Addition {
/// The mode of the added entry.
Expand Down Expand Up @@ -32,9 +33,59 @@ pub enum Event<'old, 'new> {
/// The object id after the modification.
id: Id<'new>,
},
/// Entries are considered renamed if they are not trees and they, according to some understanding of identity, appeared
/// as [`Deletion`][Event::Deletion] in case of the previous source of the rename as well as [`Addition`][Event::Addition]
/// acting as destination all the while [rename tracking][super::Platform::track_renames()] is enabled.
///
/// Note that mode changes may have occurred as well, i.e. changes from executable to non-executable or vice-versa.
Rename {
/// The location of the source of the rename operation.
///
/// It may be empty if neither [file names][super::Platform::track_filename()] nor [file paths][super::Platform::track_path()]
/// are tracked.
source_location: &'a BStr,
/// The mode of the entry before the rename.
source_entry_mode: git_object::tree::EntryMode,
/// The object id of the entry before the rename.
///
/// Note that this is the same as `id` if we require the [similarity to be 100%][super::Renames::percentage], but may
/// be different otherwise.
source_id: Id<'old>,

/// The mode of the entry after the rename.
/// It could differ but still be considered a rename as we are concerned only about content.
entry_mode: git_object::tree::EntryMode,
/// The object id after the rename.
id: Id<'new>,
},
/// This entry is considered to be a copy of another, according to some understanding of identity, as its source still exists.
/// If the source wouldn't exist, it would be considered a [rename][Event::Rename].
///
/// This variant may only occur if [rename tracking][super::Platform::track_renames()] is enabled, otherwise copies appear to be
/// plain [additions][Event::Addition].
Copy {
/// The location of the source of the copy operation.
///
/// It may be empty if neither [file names][super::Platform::track_filename()] nor [file paths][super::Platform::track_path()]
/// are tracked.
source_location: &'a BStr,
/// The mode of the entry that is considered the source.
source_entry_mode: git_object::tree::EntryMode,
/// The object id of the source of the copy.
///
/// Note that this is the same as `id` if we require the [similarity to be 100%][super::Renames::percentage], but may
/// be different otherwise.
source_id: Id<'old>,

/// The mode of the entry after the copy, or the destination of it.
/// It could differ but still be considered a copy as we are concerned only about content.
entry_mode: git_object::tree::EntryMode,
/// The object id after the copy, or the destination of it.
id: Id<'new>,
},
}

impl<'old, 'new> Event<'old, 'new> {
impl<'a, 'old, 'new> Event<'a, 'old, 'new> {
/// Produce a platform for performing a line-diff, or `None` if this is not a [`Modification`][Event::Modification]
/// or one of the entries to compare is not a blob.
pub fn diff(
Expand All @@ -50,4 +101,15 @@ impl<'old, 'new> Event<'old, 'new> {
_ => None,
}
}

/// Return the current mode of this instance.
pub fn entry_mode(&self) -> git_object::tree::EntryMode {
match self {
Event::Addition { entry_mode, .. }
| Event::Deletion { entry_mode, .. }
| Event::Modification { entry_mode, .. }
| Event::Rename { entry_mode, .. } => *entry_mode,
Event::Copy { entry_mode, .. } => *entry_mode,
}
}
}
3 changes: 3 additions & 0 deletions git-repository/src/object/tree/diff/for_each.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use git_object::TreeRefIter;
use git_odb::FindExt;

use super::{change, Action, Change, Platform, Tracking};
use crate::object::tree::diff::Renames;
use crate::{
bstr::{BStr, BString, ByteSlice, ByteVec},
ext::ObjectIdExt,
Expand Down Expand Up @@ -39,6 +40,7 @@ impl<'a, 'old> Platform<'a, 'old> {
repo: self.lhs.repo,
other_repo: other.repo,
tracking: self.tracking,
_renames: self.renames,
location: BString::default(),
path_deque: Default::default(),
visit: for_each,
Expand Down Expand Up @@ -67,6 +69,7 @@ struct Delegate<'old, 'new, VisitFn, E> {
repo: &'old Repository,
other_repo: &'new Repository,
tracking: Option<Tracking>,
_renames: Option<Renames>,
location: BString,
path_deque: VecDeque<BString>,
visit: VisitFn,
Expand Down
43 changes: 39 additions & 4 deletions git-repository/src/object/tree/diff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub struct Change<'a, 'old, 'new> {
/// Otherwise this value is always an empty path.
pub location: &'a BStr,
/// The diff event itself to provide information about what would need to change.
pub event: change::Event<'old, 'new>,
pub event: change::Event<'a, 'old, 'new>,
}

///
Expand All @@ -36,12 +36,15 @@ impl<'repo> Tree<'repo> {
/// # Performance
///
/// It's highly recommended to set an object cache to avoid extracting the same object multiple times.
pub fn changes<'a>(&'a self) -> Platform<'a, 'repo> {
Platform {
/// By default, similar to `git diff`, rename tracking will be enabled if it is not configured.
#[allow(clippy::result_large_err)]
pub fn changes<'a>(&'a self) -> Result<Platform<'a, 'repo>, renames::Error> {
Ok(Platform {
state: Default::default(),
lhs: self,
tracking: None,
}
renames: self.repo.config.diff_renames()?.unwrap_or_default().into(),
})
}
}

Expand All @@ -51,6 +54,7 @@ pub struct Platform<'a, 'repo> {
state: git_diff::tree::State,
lhs: &'a Tree<'repo>,
tracking: Option<Tracking>,
renames: Option<Renames>,
}

#[derive(Clone, Copy)]
Expand All @@ -59,6 +63,27 @@ enum Tracking {
Path,
}

/// A structure to capture how to perform rename tracking
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct Renames {
/// If `Some(…)`, do also find copies. `None` is the default which does not try to detect copies at all.
///
/// Note that this is an even more expensive operation than detecting renames as files.
pub copies: Option<renames::Copies>,
/// The percentage of similarity needed for files to be considered renamed or copied, defaulting to `Some(0.5)`.
/// This field is similar to `git diff -M50%`.
///
/// If `None`, files are only considered equal if their content matches 100%.
/// Note that values greater than 1.0 have no different effect than 1.0.
pub percentage: Option<f32>,
/// The amount of files to consider for rename or copy tracking. Defaults to 1000.
/// If 0, there is no limit.
pub limit: usize,
}

///
pub mod renames;

/// Configuration
impl<'a, 'repo> Platform<'a, 'repo> {
/// Keep track of file-names, which makes the [`location`][Change::location] field usable with the filename of the changed item.
Expand All @@ -74,6 +99,16 @@ impl<'a, 'repo> Platform<'a, 'repo> {
self.tracking = Some(Tracking::Path);
self
}

/// Provide `None` to disable rename tracking entirely, or pass `Some(<configuration>)` to control to
/// what extend rename tracking is performed.
///
/// Note that by default, the configuration determines rename tracking and standard git defaults are used
/// if nothing is configured, which turns on rename tracking with `-M50%`.
pub fn track_renames(&mut self, renames: Option<Renames>) -> &mut Self {
self.renames = renames;
self
}
}

///
Expand Down
Loading

0 comments on commit 9e7d792

Please sign in to comment.