Skip to content

Commit

Permalink
Enable shallow clones and fetches for registry and git dependencies.
Browse files Browse the repository at this point in the history
The implementation hinges on passing information about the kind of clone
and fetch to the `fetch()` method, which then configures the fetch accordingly.

Note that it doesn't differentiate between initial clones and fetches as
the shallow-ness of the repository is maintained nonetheless.
  • Loading branch information
Byron committed Mar 14, 2023
1 parent 41412a1 commit e81797d
Show file tree
Hide file tree
Showing 6 changed files with 547 additions and 20 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ filetime = "0.2.9"
flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] }
git2 = "0.16.0"
git2-curl = "0.17.0"
gix = { version = "0.41.0", default-features = false, features = ["blocking-http-transport-curl", "progress-tree"] }
gix = { version = "0.42.0", default-features = false, features = ["blocking-http-transport-curl", "progress-tree"] }
gix-features-for-configuration-only = { version = "0.28.0", package = "gix-features", features = [ "parallel" ] }
glob = "0.3.0"
hex = "0.4"
Expand Down
9 changes: 9 additions & 0 deletions src/cargo/sources/git/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,14 @@ mod source;
mod utils;

pub mod fetch {
/// The kind remote repository to fetch.
#[derive(Debug, Copy, Clone)]
pub enum RemoteKind {
/// A repository belongs to a git dependency.
GitDependency,
/// A repository belongs to a Cargo registry.
Registry,
}

pub type Error = gix::env::collate::fetch::Error<gix::refspec::parse::Error>;
}
55 changes: 43 additions & 12 deletions src/cargo/sources/git/oxide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ pub fn with_retry_and_progress(
) -> CargoResult<()> {
std::thread::scope(|s| {
let mut progress_bar = Progress::new("Fetch", config);
let is_shallow = config
.cli_unstable()
.gitoxide
.map_or(false, |gix| gix.shallow_deps || gix.shallow_index);
network::with_retry(config, || {
let progress_root: Arc<gix::progress::tree::Root> =
gix::progress::tree::root::Options {
Expand All @@ -50,7 +54,7 @@ pub fn with_retry_and_progress(
);
amend_authentication_hints(res, urls.get_mut().take())
});
translate_progress_to_bar(&mut progress_bar, root)?;
translate_progress_to_bar(&mut progress_bar, root, is_shallow)?;
thread.join().expect("no panic in scoped thread")
})
})
Expand All @@ -59,7 +63,9 @@ pub fn with_retry_and_progress(
fn translate_progress_to_bar(
progress_bar: &mut Progress<'_>,
root: Weak<gix::progress::tree::Root>,
is_shallow: bool,
) -> CargoResult<()> {
let remote_progress: gix::progress::Id = gix::remote::fetch::ProgressId::RemoteProgress.into();
let read_pack_bytes: gix::progress::Id =
gix::odb::pack::bundle::write::ProgressId::ReadPackBytes.into();
let delta_index_objects: gix::progress::Id =
Expand Down Expand Up @@ -88,6 +94,7 @@ fn translate_progress_to_bar(
"progress should be smoother by keeping these as multiples of each other"
);

let num_phases = if is_shallow { 3 } else { 2 }; // indexing + delta-resolution, both with same amount of objects to handle
while let Some(root) = root.upgrade() {
std::thread::sleep(sleep_interval);
let needs_update = last_fast_update.elapsed() >= fast_check_interval;
Expand All @@ -102,31 +109,37 @@ fn translate_progress_to_bar(
fn progress_by_id(
id: gix::progress::Id,
task: &gix::progress::Task,
) -> Option<&gix::progress::Value> {
(task.id == id).then(|| task.progress.as_ref()).flatten()
) -> Option<(&str, &gix::progress::Value)> {
(task.id == id)
.then(|| task.progress.as_ref())
.flatten()
.map(|value| (task.name.as_str(), value))
}
fn find_in<K>(
tasks: &[(K, gix::progress::Task)],
cb: impl Fn(&gix::progress::Task) -> Option<&gix::progress::Value>,
) -> Option<&gix::progress::Value> {
cb: impl Fn(&gix::progress::Task) -> Option<(&str, &gix::progress::Value)>,
) -> Option<(&str, &gix::progress::Value)> {
tasks.iter().find_map(|(_, t)| cb(t))
}

const NUM_PHASES: usize = 2; // indexing + delta-resolution, both with same amount of objects to handle
if let Some(objs) = find_in(&tasks, |t| progress_by_id(resolve_objects, t)) {
// Resolving deltas.
if let Some((_, objs)) = find_in(&tasks, |t| progress_by_id(resolve_objects, t)) {
// Phase 3: Resolving deltas.
let objects = objs.step.load(Ordering::Relaxed);
let total_objects = objs.done_at.expect("known amount of objects");
let msg = format!(", ({objects}/{total_objects}) resolving deltas");

progress_bar.tick(total_objects + objects, total_objects * NUM_PHASES, &msg)?;
progress_bar.tick(
(total_objects * (num_phases - 1)) + objects,
total_objects * num_phases,
&msg,
)?;
} else if let Some((objs, read_pack)) =
find_in(&tasks, |t| progress_by_id(read_pack_bytes, t)).and_then(|read| {
find_in(&tasks, |t| progress_by_id(delta_index_objects, t))
.map(|delta| (delta, read))
.map(|delta| (delta.1, read.1))
})
{
// Receiving objects.
// Phase 2: Receiving objects.
let objects = objs.step.load(Ordering::Relaxed);
let total_objects = objs.done_at.expect("known amount of objects");
let received_bytes = read_pack.step.load(Ordering::Relaxed);
Expand All @@ -139,7 +152,25 @@ fn translate_progress_to_bar(
let (rate, unit) = human_readable_bytes(counter.rate() as u64);
let msg = format!(", {rate:.2}{unit}/s");

progress_bar.tick(objects, total_objects * NUM_PHASES, &msg)?;
progress_bar.tick(
(total_objects * (num_phases - 2)) + objects,
total_objects * num_phases,
&msg,
)?;
} else if let Some((action, remote)) =
find_in(&tasks, |t| progress_by_id(remote_progress, t))
{
if !is_shallow {
continue;
}
// phase 1: work on the remote side

// Resolving deltas.
let objects = remote.step.load(Ordering::Relaxed);
if let Some(total_objects) = remote.done_at {
let msg = format!(", ({objects}/{total_objects}) {action}");
progress_bar.tick(objects, total_objects * num_phases, &msg)?;
}
}
}
Ok(())
Expand Down
79 changes: 74 additions & 5 deletions src/cargo/sources/git/utils.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
//! Utilities for handling git repositories, mainly around
//! authentication/cloning.
use crate::core::features::GitoxideFeatures;
use crate::core::{GitReference, Verbosity};
use crate::sources::git::fetch::RemoteKind;
use crate::sources::git::oxide;
use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
use crate::util::errors::CargoResult;
Expand Down Expand Up @@ -96,9 +98,16 @@ impl GitRemote {
// if we can. If that can successfully load our revision then we've
// populated the database with the latest version of `reference`, so
// return that database and the rev we resolve to.
let remote_kind = RemoteKind::GitDependency;
if let Some(mut db) = db {
fetch(&mut db.repo, self.url.as_str(), reference, cargo_config)
.context(format!("failed to fetch into: {}", into.display()))?;
fetch(
&mut db.repo,
self.url.as_str(),
reference,
cargo_config,
remote_kind,
)
.context(format!("failed to fetch into: {}", into.display()))?;
match locked_rev {
Some(rev) => {
if db.contains(rev) {
Expand All @@ -121,8 +130,14 @@ impl GitRemote {
}
paths::create_dir_all(into)?;
let mut repo = init(into, true)?;
fetch(&mut repo, self.url.as_str(), reference, cargo_config)
.context(format!("failed to clone into: {}", into.display()))?;
fetch(
&mut repo,
self.url.as_str(),
reference,
cargo_config,
remote_kind,
)
.context(format!("failed to clone into: {}", into.display()))?;
let rev = match locked_rev {
Some(rev) => rev,
None => reference.resolve(&repo)?,
Expand Down Expand Up @@ -282,6 +297,12 @@ impl<'a> GitCheckout<'a> {
.with_checkout(checkout)
.fetch_options(fopts)
.clone(url.as_str(), into)?;
if database.repo.is_shallow() {
std::fs::copy(
database.repo.path().join("shallow"),
r.path().join("shallow"),
)?;
}
repo = Some(r);
Ok(())
})?;
Expand Down Expand Up @@ -432,7 +453,14 @@ impl<'a> GitCheckout<'a> {
cargo_config
.shell()
.status("Updating", format!("git submodule `{}`", url))?;
fetch(&mut repo, &url, &reference, cargo_config).with_context(|| {
fetch(
&mut repo,
&url,
&reference,
cargo_config,
RemoteKind::GitDependency,
)
.with_context(|| {
format!(
"failed to fetch submodule `{}` from {}",
child.name().unwrap_or(""),
Expand Down Expand Up @@ -803,11 +831,14 @@ pub fn with_fetch_options(
})
}

/// Note that `kind` is only needed to know how to interpret `gitoxide` feature options to potentially shallow-clone
/// the repository.
pub fn fetch(
repo: &mut git2::Repository,
orig_url: &str,
reference: &GitReference,
config: &Config,
kind: RemoteKind,
) -> CargoResult<()> {
if config.frozen() {
anyhow::bail!(
Expand Down Expand Up @@ -893,6 +924,25 @@ pub fn fetch(
let git2_repo = repo;
let config_overrides = cargo_config_to_gitoxide_overrides(config)?;
let repo_reinitialized = AtomicBool::default();
let has_feature = |cb: &dyn Fn(GitoxideFeatures) -> bool| {
config
.cli_unstable()
.gitoxide
.map_or(false, |features| cb(features))
};
let shallow = if git2_repo.is_shallow() {
gix::remote::fetch::Shallow::NoChange
} else {
match kind {
RemoteKind::GitDependency if has_feature(&|git| git.shallow_deps) => {
gix::remote::fetch::Shallow::DepthAtRemote(1.try_into().expect("non-zero"))
}
RemoteKind::Registry if has_feature(&|git| git.shallow_index) => {
gix::remote::fetch::Shallow::DepthAtRemote(1.try_into().expect("non-zero"))
}
_ => gix::remote::fetch::Shallow::NoChange,
}
};
let res = oxide::with_retry_and_progress(
&git2_repo.path().to_owned(),
config,
Expand Down Expand Up @@ -952,6 +1002,7 @@ pub fn fetch(
);
let outcome = connection
.prepare_fetch(gix::remote::ref_map::Options::default())?
.with_shallow(shallow.clone())
.receive(should_interrupt)?;
Ok(outcome)
});
Expand All @@ -967,6 +1018,7 @@ pub fn fetch(
// folder before writing files into it, or else not even open a directory as git repository (which is
// also handled here).
&& err.is_corrupted()
|| has_shallow_lock_file(&err)
{
repo_reinitialized.store(true, Ordering::Relaxed);
debug!(
Expand Down Expand Up @@ -1005,6 +1057,12 @@ pub fn fetch(
// again. If it looks like any other kind of error, or if we've already
// blown away the repository, then we want to return the error as-is.
let mut repo_reinitialized = false;
// while shallow repos aren't officially supported, don't risk fetching them.
// We are in this situation only when `gitoxide` is cloning but then disabled to use `git2`
// for fetching.
if repo.is_shallow() {
reinitialize(repo)?;
}
loop {
debug!("initiating fetch of {:?} from {}", refspecs, orig_url);
let res = repo
Expand Down Expand Up @@ -1036,6 +1094,17 @@ pub fn fetch(
}
}

/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
/// files atomically.
/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
matches!(
err,
gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::LockShallowFile(_))
)
}

fn fetch_with_cli(
repo: &mut git2::Repository,
url: &str,
Expand Down
11 changes: 9 additions & 2 deletions src/cargo/sources/registry/remote.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::core::{GitReference, PackageId, SourceId};
use crate::sources::git;
use crate::sources::git::fetch::RemoteKind;
use crate::sources::registry::download;
use crate::sources::registry::MaybeLock;
use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData};
Expand Down Expand Up @@ -300,8 +301,14 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
// checkout.
let url = self.source_id.url();
let repo = self.repo.borrow_mut().unwrap();
git::fetch(repo, url.as_str(), &self.index_git_ref, self.config)
.with_context(|| format!("failed to fetch `{}`", url))?;
git::fetch(
repo,
url.as_str(),
&self.index_git_ref,
self.config,
RemoteKind::Registry,
)
.with_context(|| format!("failed to fetch `{}`", url))?;

// Create a dummy file to record the mtime for when we updated the
// index.
Expand Down
Loading

0 comments on commit e81797d

Please sign in to comment.