-
-
Notifications
You must be signed in to change notification settings - Fork 313
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- update `gix-fsck` crate status with all obvious features of `git-fsck` - clarify that fsck is only partial just to be sure people won't use it in place of `git fsck` - if BufWriter's are truly needed, let's expect it as type to avoid double-buffering unnecessarily. - consistent naming of the changelog markdown fail (all caps) - set version to 0.1.0 for fsck crate as 0.0 is used only for name-keeping when there is no functionality. - general cleanup - avoid allocating for each tree (but it's still recursive)
- Loading branch information
Showing
12 changed files
with
156 additions
and
124 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,12 @@ | ||
[package] | ||
name = "gix-fsck" | ||
version = "0.0.0" | ||
version = "0.1.0" | ||
repository = "https://github.com/Byron/gitoxide" | ||
authors = ["Cameron Esfahani <[email protected]>", "Sebastian Thiel <[email protected]>"] | ||
license = "MIT OR Apache-2.0" | ||
description = "Verifies the connectivity and validity of objects in the database" | ||
edition = "2021" | ||
include = ["src/**/*", "LICENSE-*", "CHANGELOG.md"] | ||
include = ["src/**/*", "LICENSE-*"] | ||
rust-version = "1.65" | ||
|
||
[lib] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,127 +1,161 @@ | ||
//! A library for performing object database integrity and connectivity checks | ||
#![deny(rust_2018_idioms)] | ||
#![deny(rust_2018_idioms, unsafe_code, missing_docs)] | ||
|
||
use gix_hash::ObjectId; | ||
use gix_hashtable::HashSet; | ||
use gix_object::{tree::EntryMode, Exists, FindExt, Kind}; | ||
use std::cell::RefCell; | ||
use std::ops::{Deref, DerefMut}; | ||
|
||
pub struct ConnectivityCheck<'a, T, F> | ||
/// Perform a connectivity check. | ||
pub struct Connectivity<T, F> | ||
where | ||
T: FindExt + Exists, | ||
F: FnMut(&ObjectId, Kind), | ||
{ | ||
/// ODB handle to use for the check | ||
db: &'a T, | ||
db: T, | ||
/// Closure to invoke when a missing object is encountered | ||
missing_cb: F, | ||
/// Set of Object IDs already (or about to be) scanned during the check | ||
oid_set: HashSet, | ||
/// Single buffer for decoding objects from the ODB | ||
/// This is slightly faster than allocating throughout the connectivity check (and reduces the memory requirements) | ||
buf: Vec<u8>, | ||
/// A free-list of buffers for recursive tree decoding. | ||
free_list: FreeList, | ||
} | ||
|
||
impl<'a, T, F> ConnectivityCheck<'a, T, F> | ||
impl<T, F> Connectivity<T, F> | ||
where | ||
T: FindExt + Exists, | ||
F: FnMut(&ObjectId, Kind), | ||
{ | ||
/// Instantiate a connectivity check | ||
pub fn new(db: &'a T, missing_cb: F) -> ConnectivityCheck<'a, T, F> { | ||
ConnectivityCheck { | ||
/// Instantiate a connectivity check. | ||
pub fn new(db: T, missing_cb: F) -> Connectivity<T, F> { | ||
Connectivity { | ||
db, | ||
missing_cb, | ||
oid_set: HashSet::default(), | ||
buf: Vec::new(), | ||
free_list: Default::default(), | ||
} | ||
} | ||
|
||
/// Run the connectivity check on the provided commit object ID | ||
/// - This will walk the trees and blobs referenced by the commit and verify they exist in the ODB | ||
/// - Any objects previously encountered by this [`ConnectivityCheck`] instance will be skipped | ||
/// - Any referenced blobs that are not present in the ODB will result in a call to the `missing_cb` | ||
/// - Missing commits or trees will currently result in panic | ||
/// Run the connectivity check on the provided commit `oid`. | ||
/// | ||
/// ### Algorithm | ||
/// | ||
/// Walk the trees and blobs referenced by the commit and verify they exist in the ODB. | ||
/// Any objects previously encountered by this instance will be skipped silently. | ||
/// Any referenced blobs that are not present in the ODB will result in a call to the `missing_cb`. | ||
/// Missing commits or trees will cause an error to be returned. | ||
/// - TODO: consider how to handle a missing commit (invoke `missing_cb`, or possibly return a Result?) | ||
pub fn check_commit(&mut self, oid: &ObjectId) { | ||
pub fn check_commit(&mut self, oid: &ObjectId) -> Result<(), gix_object::find::existing_object::Error> { | ||
// Attempt to insert the commit ID in the set, and if already present, return immediately | ||
if !self.oid_set.insert(*oid) { | ||
return; | ||
return Ok(()); | ||
} | ||
// Obtain the commit's tree ID | ||
let tree_id = { | ||
let commit = self.db.find_commit(oid, &mut self.buf).expect("failed to find commit"); | ||
let mut buf = self.free_list.buf(); | ||
let commit = self.db.find_commit(oid, &mut buf)?; | ||
commit.tree() | ||
}; | ||
|
||
// Attempt to insert the tree ID in the set, and if already present, return immediately | ||
if self.oid_set.insert(tree_id) { | ||
self.check_tree(&tree_id); | ||
check_tree( | ||
&tree_id, | ||
&self.db, | ||
&mut self.free_list, | ||
&mut self.missing_cb, | ||
&mut self.oid_set, | ||
); | ||
} | ||
|
||
Ok(()) | ||
} | ||
} | ||
|
||
fn check_tree(&mut self, oid: &ObjectId) { | ||
let tree = match self.db.find_tree(oid, &mut self.buf) { | ||
Ok(tree) => tree, | ||
Err(_) => { | ||
// Tree is missing, so invoke `missing_cb` | ||
(self.missing_cb)(oid, Kind::Tree); | ||
return; | ||
} | ||
}; | ||
#[derive(Default)] | ||
struct FreeList(RefCell<Vec<Vec<u8>>>); | ||
|
||
// Keeping separate sets for trees and blobs for now... | ||
// This is about a wash when compared to using a HashMap<ObjectID, Kind> | ||
struct TreeEntries { | ||
trees: HashSet<ObjectId>, | ||
blobs: HashSet<ObjectId>, | ||
} | ||
impl FreeList { | ||
fn buf(&self) -> ReturnToFreeListOnDrop<'_> { | ||
let buf = self.0.borrow_mut().pop().unwrap_or_default(); | ||
ReturnToFreeListOnDrop { buf, list: &self.0 } | ||
} | ||
} | ||
|
||
// Build up a set of trees and a set of blobs | ||
let entries: TreeEntries = { | ||
let mut entries = TreeEntries { | ||
trees: HashSet::default(), | ||
blobs: HashSet::default(), | ||
}; | ||
|
||
// For each entry in the tree | ||
for entry_ref in tree.entries.iter() { | ||
match entry_ref.mode { | ||
EntryMode::Tree => { | ||
let tree_id = entry_ref.oid.to_owned(); | ||
// Check if the tree has already been encountered | ||
if self.oid_set.insert(tree_id) { | ||
entries.trees.insert(tree_id); | ||
} | ||
} | ||
EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link => { | ||
let blob_id = entry_ref.oid.to_owned(); | ||
// Check if the blob has already been encountered | ||
if self.oid_set.insert(blob_id) { | ||
entries.blobs.insert(blob_id); | ||
} | ||
} | ||
EntryMode::Commit => { | ||
// This implies a submodule (OID is the commit hash of the submodule) | ||
// Skip it as it's not in this repository! | ||
} | ||
} | ||
} | ||
entries | ||
}; | ||
struct ReturnToFreeListOnDrop<'a> { | ||
list: &'a RefCell<Vec<Vec<u8>>>, | ||
buf: Vec<u8>, | ||
} | ||
|
||
for tree_id in entries.trees.iter() { | ||
self.check_tree(tree_id); | ||
} | ||
for blob_id in entries.blobs.iter() { | ||
self.check_blob(blob_id); | ||
impl Drop for ReturnToFreeListOnDrop<'_> { | ||
fn drop(&mut self) { | ||
if !self.buf.is_empty() { | ||
self.list.borrow_mut().push(std::mem::take(&mut self.buf)); | ||
} | ||
} | ||
} | ||
|
||
impl Deref for ReturnToFreeListOnDrop<'_> { | ||
type Target = Vec<u8>; | ||
|
||
fn deref(&self) -> &Self::Target { | ||
&self.buf | ||
} | ||
} | ||
|
||
impl DerefMut for ReturnToFreeListOnDrop<'_> { | ||
fn deref_mut(&mut self) -> &mut Self::Target { | ||
&mut self.buf | ||
} | ||
} | ||
|
||
fn check_blob<F>(db: impl Exists, oid: &ObjectId, mut missing_cb: F) | ||
where | ||
F: FnMut(&ObjectId, Kind), | ||
{ | ||
// Check if the blob is missing from the ODB | ||
if !db.exists(oid) { | ||
// Blob is missing, so invoke `missing_cb` | ||
missing_cb(oid, Kind::Blob); | ||
} | ||
} | ||
|
||
fn check_blob(&mut self, oid: &ObjectId) { | ||
// Check if the blob is missing from the ODB | ||
if !self.db.exists(oid) { | ||
// Blob is missing, so invoke `missing_cb` | ||
(self.missing_cb)(oid, Kind::Blob); | ||
fn check_tree<F>( | ||
oid: &ObjectId, | ||
db: &(impl FindExt + Exists), | ||
list: &FreeList, | ||
missing_cb: &mut F, | ||
oid_set: &mut HashSet, | ||
) where | ||
F: FnMut(&ObjectId, Kind), | ||
{ | ||
let mut buf = list.buf(); | ||
let Ok(tree) = db.find_tree(oid, &mut buf) else { | ||
missing_cb(oid, Kind::Tree); | ||
return; | ||
}; | ||
|
||
// Build up a set of trees and a set of blobs | ||
// For each entry in the tree | ||
for entry_ref in tree.entries.iter() { | ||
match entry_ref.mode { | ||
EntryMode::Tree => { | ||
let tree_id = entry_ref.oid.to_owned(); | ||
if oid_set.insert(tree_id) { | ||
check_tree(&tree_id, &*db, list, &mut *missing_cb, oid_set); | ||
} | ||
} | ||
EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link => { | ||
let blob_id = entry_ref.oid.to_owned(); | ||
if oid_set.insert(blob_id) { | ||
check_blob(&*db, &blob_id, &mut *missing_cb); | ||
} | ||
} | ||
EntryMode::Commit => { | ||
// This implies a submodule (OID is the commit hash of the submodule) | ||
// Skip it as it's not in this repository! | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.