Skip to content

Commit

Permalink
Add internal-tools git-to-sh to serialize a git repo partially to a…
Browse files Browse the repository at this point in the history
… shell script.

The shell-script will reproduce the repository, as long as the history is linear.
  • Loading branch information
Byron committed Aug 20, 2024
1 parent ba91274 commit d54d99c
Show file tree
Hide file tree
Showing 6 changed files with 295 additions and 100 deletions.
2 changes: 1 addition & 1 deletion tests/it/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ path = "src/main.rs"
clap = { version = "4.5.16", features = ["derive"] }
anyhow = "1.0.86"

gix = { version = "0.64.0", path = "../../gix", default-features = false, features = ["attributes"] }
gix = { version = "0.64.0", path = "../../gix", default-features = false, features = ["attributes", "revision"] }

31 changes: 31 additions & 0 deletions tests/it/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,37 @@ pub enum Subcommands {
#[clap(value_parser = AsPathSpec)]
patterns: Vec<gix::pathspec::Pattern>,
},
/// Serialize a git repository as linear history while degenerating content into a shell script that reproduces it.
#[clap(visible_alias = "gts")]
GitToSh {
/// The amount of commits to copy from `committish`.
///
/// If 0, all traversable commits will be copied.
#[clap(long, short = 'c', default_value_t = 0)]
count: usize,
/// Do not use `copy-royal` to degenerate information of blobs, but take blobs verbatim.
///
/// Note that this should only be done if the source repository is purely for testing
/// or was created by yourself.
#[clap(long)]
verbatim: bool,
/// The directory into which the blobs and tree declarations will be written.
#[clap(long, short = 'o', default_value = ".")]
output_dir: PathBuf,
/// The path to the git repository to serialize.
repo_dir: PathBuf,
/// The name of the directory within `output_dir` for storing blobs and trees.
name: String,
/// A revspec of the commit to start the iteration from, like `@`.
///
/// Note that the history will be serialized, and multiple parents aren't allowed.
committish: String,
/// The pathspecs to determine which paths to copy from each commit's tree.
///
/// None will copy everything.
#[clap(value_parser = AsPathSpec)]
patterns: Vec<gix::pathspec::Pattern>,
},
}

#[derive(Clone)]
Expand Down
197 changes: 100 additions & 97 deletions tests/it/src/commands/copy_royal.rs
Original file line number Diff line number Diff line change
@@ -1,111 +1,114 @@
use anyhow::Context;
use gix::fs::Stack;
use gix::pathspec::Pattern;
use std::path::{Path, PathBuf};
pub(super) mod function {
use anyhow::Context;
use gix::fs::Stack;
use gix::pathspec::Pattern;
use std::path::{Path, PathBuf};

pub fn doit(
dry_run: bool,
worktree_dir: &Path,
destination_dir: PathBuf,
patterns: Vec<Pattern>,
) -> anyhow::Result<()> {
let prefix = if dry_run { "WOULD" } else { "Will" };
let repo = gix::open(worktree_dir)?;
let index = repo.index()?;
let mut specs = repo.pathspec(
true,
// TODO: ideally this could accept patterns already.
patterns.into_iter().map(|p| p.to_bstring()),
true,
&index,
gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping,
)?;
let mut create_dir = CreateDir { dry_run };
let mut stack = gix::fs::Stack::new(destination_dir);
for (rela_path, _entry) in specs
.index_entries_with_paths(&index)
.context("Didn't find a single entry to copy")?
{
let rela_path = gix::path::from_bstr(rela_path);
let src = worktree_dir.join(&rela_path);
stack.make_relative_path_current(&rela_path, &mut create_dir)?;
let dst = stack.current();
pub fn copy_royal(
dry_run: bool,
worktree_dir: &Path,
destination_dir: PathBuf,
patterns: Vec<Pattern>,
) -> anyhow::Result<()> {
let prefix = if dry_run { "WOULD" } else { "Will" };
let repo = gix::open(worktree_dir)?;
let index = repo.index()?;
let mut specs = repo.pathspec(
true,
// TODO: ideally this could accept patterns already.
patterns.into_iter().map(|p| p.to_bstring()),
true,
&index,
gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping,
)?;
let mut create_dir = CreateDir { dry_run };
let mut stack = gix::fs::Stack::new(destination_dir);
for (rela_path, _entry) in specs
.index_entries_with_paths(&index)
.context("Didn't find a single entry to copy")?
{
let rela_path = gix::path::from_bstr(rela_path);
let src = worktree_dir.join(&rela_path);
stack.make_relative_path_current(&rela_path, &mut create_dir)?;
let dst = stack.current();

eprintln!(
"{prefix} copy '{src}' to '{dst}'",
src = src.display(),
dst = dst.display()
);
if !dry_run {
let content = std::fs::read_to_string(&src).with_context(|| {
format!(
"Need UTF-8 decodable content in '{src}' - skip binaries with pathspec",
src = src.display()
)
})?;
std::fs::write(dst, remapped(content))?
eprintln!(
"{prefix} copy '{src}' to '{dst}'",
src = src.display(),
dst = dst.display()
);
if !dry_run {
let content = std::fs::read_to_string(&src).with_context(|| {
format!(
"Need UTF-8 decodable content in '{src}' - skip binaries with pathspec",
src = src.display()
)
})?;
std::fs::write(dst, remapped(&content))?
}
}
Ok(())
}
Ok(())
}

fn remapped(i: String) -> String {
i.chars()
.filter_map(|c| {
Some(if c.is_alphabetic() {
if c.is_uppercase() {
match (c as u32) % 10 {
0 => 'A',
1 => 'E',
2 => 'I',
3 => 'O',
4 => 'U',
5 => 'X',
6 => 'R',
7 => 'S',
8 => 'T',
9 => 'Y',
_ => unreachable!(),
pub fn remapped(i: &str) -> String {
i.chars()
.filter_map(|c| {
Some(if c.is_alphabetic() {
if c.is_uppercase() {
match (c as u32) % 10 {
0 => 'A',
1 => 'E',
2 => 'I',
3 => 'O',
4 => 'U',
5 => 'X',
6 => 'R',
7 => 'S',
8 => 'T',
9 => 'Y',
_ => unreachable!(),
}
} else {
match (c as u32) % 10 {
0 => 'a',
1 => 'e',
2 => 'i',
3 => 'o',
4 => 'u',
5 => 'x',
6 => 'r',
7 => 's',
8 => 't',
9 => 'y',
_ => unreachable!(),
}
}
} else if c.is_whitespace() || c.is_ascii_punctuation() || c.is_ascii_digit() {
c
} else {
match (c as u32) % 10 {
0 => 'a',
1 => 'e',
2 => 'i',
3 => 'o',
4 => 'u',
5 => 'x',
6 => 'r',
7 => 's',
8 => 't',
9 => 'y',
_ => unreachable!(),
}
}
} else if c.is_whitespace() || c.is_ascii_punctuation() || c.is_ascii_digit() {
c
} else {
return None;
return None;
})
})
})
.collect()
}
.collect()
}

struct CreateDir {
dry_run: bool,
}
struct CreateDir {
dry_run: bool,
}

impl gix::fs::stack::Delegate for CreateDir {
fn push_directory(&mut self, stack: &Stack) -> std::io::Result<()> {
if !self.dry_run && !stack.current().is_dir() {
std::fs::create_dir(stack.current())?;
impl gix::fs::stack::Delegate for CreateDir {
fn push_directory(&mut self, stack: &Stack) -> std::io::Result<()> {
if !self.dry_run && !stack.current().is_dir() {
std::fs::create_dir(stack.current())?;
}
Ok(())
}
Ok(())
}

fn push(&mut self, _is_last_component: bool, _stack: &Stack) -> std::io::Result<()> {
Ok(())
}
fn push(&mut self, _is_last_component: bool, _stack: &Stack) -> std::io::Result<()> {
Ok(())
}

fn pop_directory(&mut self) {}
fn pop_directory(&mut self) {}
}
}
pub use function::remapped;
137 changes: 137 additions & 0 deletions tests/it/src/commands/git_to_sh.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
pub struct Options {
pub patterns: Vec<gix::pathspec::Pattern>,
pub verbatim: bool,
pub max_count: usize,
}

pub(super) mod function {
use anyhow::{bail, Context};
use gix::object::tree::EntryKind;
use gix::objs::FindExt;
use std::borrow::Cow;
use std::path::Path;

use super::Options;

pub fn git_to_sh(
output_dir: &Path,
repo_dir: &Path,
name: &str,
committish: &str,
mut out: impl std::io::Write,
Options {
patterns,
verbatim,
max_count,
}: Options,
) -> anyhow::Result<()> {
let repo = gix::open(repo_dir)?;
let commit = repo.rev_parse_single(committish)?.object()?.try_into_commit()?;

let assets = output_dir.join(name);
std::fs::create_dir_all(&assets)?;

let mut commits = Vec::new();
let mut tree_buf = Vec::new();
let mut current = 0;
for entry in commit.id().ancestors().first_parent_only().all()? {
let entry = entry?;

let commit = entry.id().object()?.into_commit();
commits.push((commit.id, commit.message_raw_sloppy().to_owned()));
let index = repo.index_from_tree(&commit.tree_id()?)?;

tree_buf.clear();
write_tree_as_update_index_format(&repo, &index, &mut tree_buf, &assets, verbatim, patterns.clone())?;

let tree_file = assets.join(format!("{}.tree", commit.id));
std::fs::write(tree_file, &tree_buf)?;
current += 1;

if current >= max_count {
break;
}
}

writeln!(
&mut out,
"# The following is to be executed in the receiving git repository"
)?;
writeln!(&mut out, "ROOT=to-be-specified-by-user")?;
writeln!(&mut out, "index=.git/index")?;
writeln!(&mut out, "git hash-object -w -t blob -- $ROOT/{name}/*.blob")?;
for (commit_id, commit_msg) in commits.iter().rev() {
writeln!(&mut out, "rm \"$index\"")?;
writeln!(
&mut out,
"git update-index --index-info < \"$ROOT/{name}/{commit_id}.tree\""
)?;
let commit_msg_file = assets.join(format!("{commit_id}.msg"));
std::fs::write(commit_msg_file, commit_msg)?;
writeln!(&mut out, "git commit --allow-empty -F \"$ROOT/{name}/{commit_id}.msg\"")?;
}

Ok(())
}

fn write_tree_as_update_index_format(
repo: &gix::Repository,
index: &gix::index::State,
out: &mut dyn std::io::Write,
output_dir: &Path,
verbatim: bool,
patterns: Vec<gix::pathspec::Pattern>,
) -> anyhow::Result<()> {
let mut blob_buf = Vec::new();
let mut specs = repo.pathspec(
true,
// TODO: ideally this could accept patterns already.
patterns.clone().into_iter().map(|p| p.to_bstring()),
true,
index,
gix::worktree::stack::state::attributes::Source::IdMapping,
)?;

for (rela_path, entry) in specs.index_entries_with_paths(index).into_iter().flatten() {
if rela_path.contains(&b'\n') {
bail!("Entry at '{rela_path}' contained a newline, which currently can't be encoded. Preferred newlines over NULL separation.")
}

let (blob_id, blob_data) = match entry.mode.to_tree_entry_mode() {
None => {
bail!("Couldn't interpret mode of tree entry at '{rela_path}'")
}
Some(mode) => match mode.kind() {
EntryKind::Tree => {
unreachable!("Can't have trees in indices")
}
EntryKind::Blob | EntryKind::BlobExecutable => {
let obj = repo.objects.find(&entry.id, &mut blob_buf)?;
if verbatim {
(entry.id, Cow::Borrowed(&blob_buf))
} else {
let data = std::str::from_utf8(obj.data).with_context(|| {
format!("Entry at '{rela_path}' was not valid UTF8 and can't be remapped")
})?;
let mapped = crate::commands::copy_royal::remapped(data);
(
gix::objs::compute_hash(repo.object_hash(), gix::object::Kind::Blob, mapped.as_bytes()),
Cow::Owned(mapped.into()),
)
}
}
EntryKind::Link => {
repo.objects.find(&entry.id, &mut blob_buf)?;
(entry.id, Cow::Borrowed(&blob_buf))
}
EntryKind::Commit => continue,
},
};
let blob_path = output_dir.join(format!("{blob_id}.blob"));
std::fs::write(blob_path, blob_data.as_ref())?;

writeln!(out, "{mode:06o} {blob_id}\t{rela_path}", mode = entry.mode)?;
}
Ok(())
}
}
Loading

0 comments on commit d54d99c

Please sign in to comment.