Skip to content

Commit

Permalink
feat: add simple CLI for gix archive
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Jul 22, 2023
1 parent c4a1fb1 commit 32bbb8b
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 1 deletion.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,17 @@ prodash-render-line = ["prodash/render-line", "prodash-render-line-crossterm", "
cache-efficiency-debug = ["gix-features/cache-efficiency-debug"]

## A way to enable most `gitoxide-core` tools found in `ein tools`, namely `organize` and `estimate hours`.
gitoxide-core-tools = ["gitoxide-core/organize", "gitoxide-core/estimate-hours"]
gitoxide-core-tools = ["gitoxide-core/organize", "gitoxide-core/estimate-hours", "gitoxide-core-tools-archive"]

## A program to perform analytics on a `git` repository, using an auto-maintained sqlite database
gitoxide-core-tools-query = ["gitoxide-core/query"]

## A program to run algorithms on a corpus of repositories, recording each run for later comparison.
gitoxide-core-tools-corpus = ["gitoxide-core/corpus"]

## A sub-command to generate archive from virtual worktree checkouts.
gitoxide-core-tools-archive = ["gitoxide-core/archive"]

#! ### Building Blocks for mutually exclusive networking
#! Blocking and async features are mutually exclusive and cause a compile-time error. This also means that `cargo … --all-features` will fail.
#! Within each section, features can be combined.
Expand Down
4 changes: 4 additions & 0 deletions gitoxide-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ query = ["dep:rusqlite"]
## *Note that* `organize` we need for finding git repositories fast.
corpus = [ "dep:rusqlite", "dep:sysinfo", "organize", "dep:crossbeam-channel", "dep:serde_json", "dep:tracing-forest", "dep:tracing-subscriber", "dep:tracing", "dep:parking_lot" ]

## The ability to create archives from virtual worktrees, similar to `git archive`.
archive = ["dep:gix-archive-for-configuration-only", "gix/worktree-archive"]

#! ### Mutually Exclusive Networking
#! If both are set, _blocking-client_ will take precedence, allowing `--all-features` to be used.

Expand All @@ -44,6 +47,7 @@ serde = ["gix/serde", "dep:serde_json", "dep:serde", "bytesize/serde"]
gix = { version = "^0.49.1", path = "../gix", default-features = false }
gix-pack-for-configuration-only = { package = "gix-pack", version = "^0.40.0", path = "../gix-pack", default-features = false, features = ["pack-cache-lru-dynamic", "pack-cache-lru-static"] }
gix-transport-configuration-only = { package = "gix-transport", version = "^0.34.0", path = "../gix-transport", default-features = false }
gix-archive-for-configuration-only = { package = "gix-archive", version = "^0.2.0", path = "../gix-archive", optional = true, features = ["tar"] }
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
anyhow = "1.0.42"
thiserror = "1.0.34"
Expand Down
80 changes: 80 additions & 0 deletions gitoxide-core/src/repository/archive.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
use anyhow::bail;
use gix::worktree::archive;
use gix::Progress;
use std::ops::Add;
use std::path::Path;

pub fn stream(
repo: gix::Repository,
destination_path: Option<&Path>,
rev_spec: Option<&str>,
mut progress: impl Progress,
format: Option<archive::Format>,
) -> anyhow::Result<()> {
let format = format.map_or_else(|| format_from_ext(destination_path), Ok)?;
let object = repo.rev_parse_single(rev_spec.unwrap_or("HEAD"))?.object()?;
let (modification_date, tree) = fetch_rev_info(object)?;

let start = std::time::Instant::now();
let (stream, index) = repo.worktree_stream(tree)?;

let mut entries = progress.add_child("entries");
entries.init(Some(index.entries().len()), gix::progress::count("entries"));
let mut bytes = progress.add_child("written");
bytes.init(None, gix::progress::bytes());

let mut file = gix::progress::Write {
inner: match destination_path {
Some(path) => Box::new(std::io::BufWriter::with_capacity(
128 * 1024,
std::fs::File::create(path)?,
)) as Box<dyn std::io::Write>,
None => Box::new(std::io::sink()),
},
progress: &mut bytes,
};
repo.worktree_archive(
stream,
&mut file,
&mut entries,
&gix::interrupt::IS_INTERRUPTED,
gix::worktree::archive::Options {
format,
tree_prefix: None,
modification_time: modification_date
.map(|t| std::time::UNIX_EPOCH.add(std::time::Duration::from_secs(t as u64)))
.unwrap_or_else(std::time::SystemTime::now),
},
)?;

entries.show_throughput(start);
bytes.show_throughput(start);

Ok(())
}

fn fetch_rev_info(
object: gix::Object<'_>,
) -> anyhow::Result<(Option<gix::date::SecondsSinceUnixEpoch>, gix::ObjectId)> {
Ok(match object.kind {
gix::object::Kind::Commit => {
let commit = object.into_commit();
(Some(commit.committer()?.time.seconds), commit.tree_id()?.detach())
}
gix::object::Kind::Tree => (None, object.id),
gix::object::Kind::Tag => fetch_rev_info(object.peel_to_kind(gix::object::Kind::Commit)?)?,
gix::object::Kind::Blob => bail!("Cannot derive commit or tree from blob at {}", object.id),
})
}

fn format_from_ext(path: Option<&Path>) -> anyhow::Result<archive::Format> {
Ok(match path {
Some(path) => match path.extension().and_then(|ext| ext.to_str()) {
None => bail!("Cannot derive archive format from a file without extension"),
Some("tar") => archive::Format::Tar,
Some("stream") => archive::Format::InternalTransientNonPersistable,
Some(ext) => bail!("Format for extendion '{ext}' is unsupported"),
},
None => archive::Format::InternalTransientNonPersistable,
})
}
2 changes: 2 additions & 0 deletions gitoxide-core/src/repository/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ pub fn init(directory: Option<PathBuf>) -> Result<gix::discover::repository::Pat
.with_context(|| "Repository initialization failed")
}

#[cfg(feature = "archive")]
pub mod archive;
pub mod commit;
pub mod config;
mod credential;
Expand Down
27 changes: 27 additions & 0 deletions src/plumbing/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,33 @@ pub fn main() -> Result<()> {
})?;

match cmd {
#[cfg(feature = "gitoxide-core-tools-archive")]
Subcommands::Archive(crate::plumbing::options::archive::Platform {
format,
output_file,
treeish,
}) => prepare_and_run(
"archive",
trace,
auto_verbose,
progress,
progress_keep_open,
None,
move |progress, _out, _err| {
core::repository::archive::stream(
repository(Mode::Lenient)?,
output_file.as_deref(),
treeish.as_deref(),
progress,
format.map(|f| match f {
crate::plumbing::options::archive::Format::Internal => {
gix::worktree::archive::Format::InternalTransientNonPersistable
}
crate::plumbing::options::archive::Format::Tar => gix::worktree::archive::Format::Tar,
}),
)
},
),
#[cfg(feature = "gitoxide-core-tools-corpus")]
Subcommands::Corpus(crate::plumbing::options::corpus::Platform { db, path, cmd }) => {
let reverse_trace_lines = progress;
Expand Down
34 changes: 34 additions & 0 deletions src/plumbing/options/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ pub struct Args {

#[derive(Debug, clap::Subcommand)]
pub enum Subcommands {
/// Subcommands for creating worktree archivs
#[cfg(feature = "gitoxide-core-tools-archive")]
Archive(archive::Platform),
/// Subcommands for interacting with commit-graphs
#[clap(subcommand)]
CommitGraph(commitgraph::Subcommands),
Expand Down Expand Up @@ -129,6 +132,37 @@ pub enum Subcommands {
Free(free::Subcommands),
}

#[cfg(feature = "gitoxide-core-tools-archive")]
pub mod archive {
use std::path::PathBuf;

#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)]
pub enum Format {
/// An internal format that is for debugging, it should not be persisted and cannot be read back.
///
/// However, it represents that bare data stream without with minimal overhead, and is a good
/// metric for throughput.
Internal,
/// Use the `.tar` file format, uncompressed.
Tar,
}

#[derive(Debug, clap::Parser)]
pub struct Platform {
#[clap(long, short = 'f', value_enum)]
pub format: Option<Format>,
/// The file to write the archive to, or discard the output immediately.
///
/// It's extension determines the archive format, unless `--format` is set.
pub output_file: Option<PathBuf>,

/// The revspec of the commit or tree to traverse, or the tree at `HEAD` if unspecified.
///
/// If commit, the commit timestamp will be used as timestamp for each file in the archive.
pub treeish: Option<String>,
}
}

#[cfg(feature = "gitoxide-core-tools-corpus")]
pub mod corpus {
use std::path::PathBuf;
Expand Down

0 comments on commit 32bbb8b

Please sign in to comment.