-
-
Notifications
You must be signed in to change notification settings - Fork 313
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement a simple connectivity check in a new `gix-fsck` crate, and add this to `gix` via a new `fsck` subcommand. Currently this is functionally equivalent to: `git rev-list --objects --quiet --missing=print` feat: add basic connectivity check - address review feedback
- Loading branch information
Showing
18 changed files
with
378 additions
and
2 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -281,6 +281,7 @@ members = [ | |
"gix-archive", | ||
"gix-worktree-stream", | ||
"gix-revwalk", | ||
"gix-fsck", | ||
|
||
"tests/tools", | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
use std::io::{BufWriter, Write}; | ||
|
||
use anyhow::Context; | ||
use gix::{objs::Kind, ObjectId}; | ||
|
||
pub fn connectivity(mut repo: gix::Repository, spec: Option<String>, out: impl std::io::Write) -> anyhow::Result<()> { | ||
let mut out = BufWriter::with_capacity(64 * 1024, out); | ||
let spec = spec.unwrap_or("HEAD".into()); | ||
|
||
repo.object_cache_size_if_unset(4 * 1024 * 1024); | ||
// We expect to be finding a bunch of non-existent objects here - never refresh the ODB | ||
repo.objects.refresh_never(); | ||
|
||
let id = repo | ||
.rev_parse_single(spec.as_str()) | ||
.context("Only single revisions are supported")?; | ||
let commits: gix::revision::Walk<'_> = id | ||
.object()? | ||
.peel_to_kind(gix::object::Kind::Commit) | ||
.context("Need commitish as starting point")? | ||
.id() | ||
.ancestors() | ||
.all()?; | ||
|
||
let missing_cb = |oid: &ObjectId, kind: Kind| { | ||
writeln!(out, "{oid}: {kind}").expect("failed to write output"); | ||
}; | ||
let mut conn = gix_fsck::ConnectivityCheck::new(&repo.objects, missing_cb); | ||
|
||
// Walk all commits, checking each one for connectivity | ||
for commit in commits { | ||
let commit = commit?; | ||
conn.check_commit(&commit.id); | ||
for parent in commit.parent_ids { | ||
conn.check_commit(&parent); | ||
} | ||
} | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
[package] | ||
name = "gix-fsck" | ||
version = "0.0.0" | ||
repository = "https://github.com/Byron/gitoxide" | ||
authors = ["Cameron Esfahani <[email protected]>", "Sebastian Thiel <[email protected]>"] | ||
license = "MIT OR Apache-2.0" | ||
description = "Verifies the connectivity and validity of objects in the database" | ||
edition = "2021" | ||
include = ["src/**/*", "LICENSE-*", "CHANGELOG.md"] | ||
rust-version = "1.65" | ||
|
||
[lib] | ||
doctest = false | ||
|
||
[dependencies] | ||
gix-hash = { version = "^0.13.1", path = "../gix-hash" } | ||
gix-hashtable = { version = "^0.4.0", path = "../gix-hashtable" } | ||
gix-object = { version = "^0.38.0", path = "../gix-object" } | ||
|
||
[dev-dependencies] | ||
gix-odb = { path = "../gix-odb" } | ||
gix-testtools = { path = "../tests/tools"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Changelog | ||
|
||
All notable changes to this project will be documented in this file. | ||
|
||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), | ||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../LICENSE-APACHE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../LICENSE-MIT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
//! A library for performing object database integrity and connectivity checks | ||
#![deny(rust_2018_idioms)] | ||
|
||
use gix_hash::ObjectId; | ||
use gix_hashtable::HashSet; | ||
use gix_object::{tree::EntryMode, Exists, FindExt, Kind}; | ||
|
||
pub struct ConnectivityCheck<'a, T, F> | ||
where | ||
T: FindExt + Exists, | ||
F: FnMut(&ObjectId, Kind), | ||
{ | ||
/// ODB handle to use for the check | ||
db: &'a T, | ||
/// Closure to invoke when a missing object is encountered | ||
missing_cb: F, | ||
/// Set of Object IDs already (or about to be) scanned during the check | ||
oid_set: HashSet, | ||
/// Single buffer for decoding objects from the ODB | ||
/// This is slightly faster than allocating throughout the connectivity check (and reduces the memory requirements) | ||
buf: Vec<u8>, | ||
} | ||
|
||
impl<'a, T, F> ConnectivityCheck<'a, T, F> | ||
where | ||
T: FindExt + Exists, | ||
F: FnMut(&ObjectId, Kind), | ||
{ | ||
/// Instantiate a connectivity check | ||
pub fn new(db: &'a T, missing_cb: F) -> ConnectivityCheck<'a, T, F> { | ||
ConnectivityCheck { | ||
db, | ||
missing_cb, | ||
oid_set: HashSet::default(), | ||
buf: Vec::new(), | ||
} | ||
} | ||
|
||
/// Run the connectivity check on the provided commit object ID | ||
/// - This will walk the trees and blobs referenced by the commit and verify they exist in the ODB | ||
/// - Any objects previously encountered by this [`ConnectivityCheck`] instance will be skipped | ||
/// - Any referenced blobs that are not present in the ODB will result in a call to the `missing_cb` | ||
/// - Missing commits or trees will currently result in panic | ||
/// - TODO: consider how to handle a missing commit (invoke `missing_cb`, or possibly return a Result?) | ||
pub fn check_commit(&mut self, oid: &ObjectId) { | ||
// Attempt to insert the commit ID in the set, and if already present, return immediately | ||
if !self.oid_set.insert(*oid) { | ||
return; | ||
} | ||
// Obtain the commit's tree ID | ||
let tree_id = { | ||
let commit = self.db.find_commit(oid, &mut self.buf).expect("failed to find commit"); | ||
commit.tree() | ||
}; | ||
|
||
// Attempt to insert the tree ID in the set, and if already present, return immediately | ||
if self.oid_set.insert(tree_id) { | ||
self.check_tree(&tree_id); | ||
} | ||
} | ||
|
||
fn check_tree(&mut self, oid: &ObjectId) { | ||
let tree = match self.db.find_tree(oid, &mut self.buf) { | ||
Ok(tree) => tree, | ||
Err(_) => { | ||
// Tree is missing, so invoke `missing_cb` | ||
(self.missing_cb)(oid, Kind::Tree); | ||
return; | ||
} | ||
}; | ||
|
||
// Keeping separate sets for trees and blobs for now... | ||
// This is about a wash when compared to using a HashMap<ObjectID, Kind> | ||
struct TreeEntries { | ||
trees: HashSet<ObjectId>, | ||
blobs: HashSet<ObjectId>, | ||
} | ||
|
||
// Build up a set of trees and a set of blobs | ||
let entries: TreeEntries = { | ||
let mut entries = TreeEntries { | ||
trees: HashSet::default(), | ||
blobs: HashSet::default(), | ||
}; | ||
|
||
// For each entry in the tree | ||
for entry_ref in tree.entries.iter() { | ||
match entry_ref.mode { | ||
EntryMode::Tree => { | ||
let tree_id = entry_ref.oid.to_owned(); | ||
// Check if the tree has already been encountered | ||
if self.oid_set.insert(tree_id) { | ||
entries.trees.insert(tree_id); | ||
} | ||
} | ||
EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link => { | ||
let blob_id = entry_ref.oid.to_owned(); | ||
// Check if the blob has already been encountered | ||
if self.oid_set.insert(blob_id) { | ||
entries.blobs.insert(blob_id); | ||
} | ||
} | ||
EntryMode::Commit => { | ||
// This implies a submodule (OID is the commit hash of the submodule) | ||
// Skip it as it's not in this repository! | ||
} | ||
} | ||
} | ||
entries | ||
}; | ||
|
||
for tree_id in entries.trees.iter() { | ||
self.check_tree(tree_id); | ||
} | ||
for blob_id in entries.blobs.iter() { | ||
self.check_blob(blob_id); | ||
} | ||
} | ||
|
||
fn check_blob(&mut self, oid: &ObjectId) { | ||
// Check if the blob is missing from the ODB | ||
if !self.db.exists(oid) { | ||
// Blob is missing, so invoke `missing_cb` | ||
(self.missing_cb)(oid, Kind::Blob); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
use std::ops::Deref; | ||
|
||
use gix_fsck::ConnectivityCheck; | ||
use gix_hash::ObjectId; | ||
use gix_hashtable::HashMap; | ||
use gix_object::Kind; | ||
use gix_testtools::once_cell::sync::Lazy; | ||
|
||
use crate::hex_to_id; | ||
|
||
fn check_missing<'a>(repo_name: &str, commits: impl IntoIterator<Item = &'a ObjectId>) -> HashMap<ObjectId, Kind> { | ||
let db = { | ||
let fixture_path = gix_testtools::scripted_fixture_read_only("make_test_repos.sh") | ||
.expect("fixture path") | ||
.join(repo_name) | ||
.join(".git") | ||
.join("objects"); | ||
let mut db = gix_odb::at(fixture_path).expect("odb handle"); | ||
db.refresh_never(); | ||
db | ||
}; | ||
|
||
let mut missing: HashMap<ObjectId, Kind> = HashMap::default(); | ||
let callback = |oid: &ObjectId, kind: Kind| { | ||
missing.try_insert(*oid, kind).expect("no duplicate oid"); | ||
}; | ||
|
||
let mut check = ConnectivityCheck::new(&db, callback); | ||
for commit in commits.into_iter() { | ||
check.check_commit(commit); | ||
} | ||
|
||
missing | ||
} | ||
|
||
fn hex_to_ids<'a>(hex_ids: impl IntoIterator<Item = &'a str>) -> Vec<ObjectId> { | ||
hex_ids.into_iter().map(hex_to_id).collect() | ||
} | ||
|
||
fn hex_to_objects<'a>(hex_ids: impl IntoIterator<Item = &'a str>, kind: Kind) -> HashMap<ObjectId, Kind> { | ||
hex_to_ids(hex_ids) | ||
.into_iter() | ||
.map(|id| (id, kind)) | ||
.collect::<HashMap<_, _>>() | ||
} | ||
|
||
// Get a `&Vec<ObjectID` for each commit in the test fixture repository | ||
fn all_commits<'a>() -> &'a Vec<ObjectId> { | ||
static ALL_COMMITS: Lazy<Vec<ObjectId>> = Lazy::new(|| { | ||
hex_to_ids(vec![ | ||
"5d18db2e2aabadf7b914435ef34f2faf8b4546dd", | ||
"3a3dfaa55a515f3fb3a25751107bbb523af6a1b0", | ||
"734c926856a328d1168ffd7088532e0d1ad19bbe", | ||
]) | ||
}); | ||
ALL_COMMITS.deref() | ||
} | ||
|
||
#[test] | ||
fn no_missing() { | ||
// The "base" repo is the original, and has every object present | ||
assert_eq!(check_missing("base", all_commits()), HashMap::default()); | ||
} | ||
|
||
#[test] | ||
fn missing_blobs() { | ||
// The "blobless" repo is cloned with `--filter=blob:none`, and is missing one blob | ||
let expected = hex_to_objects(vec!["c18147dc648481eeb65dc5e66628429a64843327"], Kind::Blob); | ||
assert_eq!(check_missing("blobless", all_commits()), expected); | ||
} | ||
|
||
#[test] | ||
fn missing_trees() { | ||
// The "treeless" repo is cloned with `--filter=tree:0`, and is missing two trees | ||
// NOTE: This repo is also missing a blob, but we have no way of knowing that, as the tree referencing it is missing | ||
let expected = hex_to_objects( | ||
vec![ | ||
"9561cfbae43c5e2accdfcd423378588dd10d827f", | ||
"fc264b3b6875a46e9031483aeb9994a1b897ffd3", | ||
], | ||
Kind::Tree, | ||
); | ||
assert_eq!(check_missing("treeless", all_commits()), expected); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
make_test_repos.tar.xz |
Oops, something went wrong.