-
-
Notifications
You must be signed in to change notification settings - Fork 313
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[commitgraph] Implement basic commit-graph file verification.
Missing features: 1. It operates on commit-graph files only, so it doesn't verify that commit-graph data matches `git-odb` data. 2. No progress reporting or parallelization. This shouldn't be needed until until we need to check against `git-odb` data. Example output for Linux repo: ``` $ time ./target/release/gixp commit-graph-verify -s ~/src/linux/.git/objects/info number of commits with the given number of parents 0: 4 1: 878988 2: 67800 3: 652 4: 408 5: 382 6: 454 7: 95 8: 65 9: 47 10: 25 11: 26 12: 14 13: 4 14: 3 18: 1 19: 1 20: 1 21: 1 24: 1 27: 1 30: 1 32: 1 66: 1 ->: 948976 longest path length between two commits: 160521 real 0m0.196s user 0m0.180s sys 0m0.016s ```
- Loading branch information
1 parent
701f33c
commit 2571113
Showing
19 changed files
with
459 additions
and
21 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,8 @@ | |
mod access; | ||
pub mod commit; | ||
mod init; | ||
pub mod verify; | ||
|
||
pub use init::Error; | ||
|
||
pub use commit::Commit; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
use crate::{ | ||
file::{self, File}, | ||
GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX, | ||
}; | ||
use bstr::ByteSlice; | ||
use git_object::{borrowed, owned, SHA1_SIZE}; | ||
use std::cmp::{max, min}; | ||
use std::collections::HashMap; | ||
use std::convert::TryFrom; | ||
use std::path::Path; | ||
|
||
#[derive(thiserror::Error, Debug)] | ||
pub enum Error { | ||
#[error(transparent)] | ||
Commit(#[from] file::commit::Error), | ||
#[error("commit at file position {pos} has invalid ID {id}")] | ||
CommitId { id: owned::Id, pos: file::Position }, | ||
#[error("commit at file position {pos} with ID {id} is out of order relative to its predecessor with ID {predecessor_id}")] | ||
CommitsOutOfOrder { | ||
id: owned::Id, | ||
pos: file::Position, | ||
predecessor_id: owned::Id, | ||
}, | ||
#[error("commit-graph filename should be {0}")] | ||
Filename(String), | ||
#[error("commit {id} has invalid generation {generation}")] | ||
Generation { generation: u32, id: owned::Id }, | ||
#[error("checksum mismatch: expected {expected}, got {actual}")] | ||
Mismatch { expected: owned::Id, actual: owned::Id }, | ||
#[error("commit {id} has invalid root tree ID {root_tree_id}")] | ||
RootTreeId { id: owned::Id, root_tree_id: owned::Id }, | ||
} | ||
|
||
// This is a separate type to let `traverse`'s caller use the same error type for its result and its | ||
// processor error type while also letting that error type contain file::verify::Error values. | ||
// Is there a better way? Should the caller's error type just use boxes to avoid recursive type | ||
// errors? | ||
#[derive(thiserror::Error, Debug)] | ||
pub enum EitherError<E1: std::error::Error + 'static, E2: std::error::Error + 'static> { | ||
#[error(transparent)] | ||
Internal(#[from] E1), | ||
// Why can't I use #[from] here? Boo! | ||
#[error("{0}")] | ||
Processor(#[source] E2), | ||
} | ||
|
||
#[derive(Clone, Debug, Eq, PartialEq)] | ||
#[cfg_attr(feature = "serde1", derive(serde::Deserialize, serde::Serialize))] | ||
pub struct Outcome { | ||
pub max_generation: u32, | ||
pub max_parents: u32, | ||
pub min_generation: u32, | ||
pub num_commits: u32, | ||
pub parent_counts: HashMap<u32, u32>, | ||
} | ||
|
||
impl File { | ||
pub fn checksum(&self) -> borrowed::Id<'_> { | ||
borrowed::Id::try_from(&self.data[self.data.len() - SHA1_SIZE..]).expect("file to be large enough for a hash") | ||
} | ||
|
||
pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, EitherError<Error, E>> | ||
where | ||
E: std::error::Error + 'static, | ||
Processor: FnMut(&file::Commit<'a>) -> Result<(), E>, | ||
{ | ||
self.verify_checksum()?; | ||
verify_split_chain_filename_hash(&self.path, self.checksum())?; | ||
|
||
// This probably belongs in borrowed::Id itself? | ||
let null_id = borrowed::Id::from(&[0u8; SHA1_SIZE]); | ||
|
||
let mut stats = Outcome { | ||
max_generation: 0, | ||
max_parents: 0, | ||
min_generation: GENERATION_NUMBER_INFINITY, | ||
num_commits: self.num_commits(), | ||
parent_counts: HashMap::new(), | ||
}; | ||
|
||
// TODO: Verify self.fan values as we go. | ||
let mut prev_id: borrowed::Id<'a> = null_id; | ||
for commit in self.iter_commits() { | ||
if commit.id() <= prev_id { | ||
if commit.id() == null_id { | ||
return Err(Error::CommitId { | ||
pos: commit.position(), | ||
id: commit.id().into(), | ||
} | ||
.into()); | ||
} | ||
return Err(Error::CommitsOutOfOrder { | ||
pos: commit.position(), | ||
id: commit.id().into(), | ||
predecessor_id: prev_id.into(), | ||
} | ||
.into()); | ||
} | ||
if commit.root_tree_id() == null_id { | ||
return Err(Error::RootTreeId { | ||
id: commit.id().into(), | ||
root_tree_id: commit.root_tree_id().into(), | ||
} | ||
.into()); | ||
} | ||
if commit.generation() > GENERATION_NUMBER_MAX { | ||
return Err(Error::Generation { | ||
generation: commit.generation(), | ||
id: commit.id().into(), | ||
} | ||
.into()); | ||
} | ||
|
||
processor(&commit).map_err(EitherError::Processor)?; | ||
|
||
stats.max_generation = max(stats.max_generation, commit.generation()); | ||
stats.min_generation = min(stats.min_generation, commit.generation()); | ||
let parent_count = commit | ||
.iter_parents() | ||
.try_fold(0u32, |acc, pos| pos.map(|_| acc + 1)) | ||
.map_err(Error::Commit)?; | ||
*stats.parent_counts.entry(parent_count).or_insert(0) += 1; | ||
prev_id = commit.id(); | ||
} | ||
|
||
if stats.min_generation == GENERATION_NUMBER_INFINITY { | ||
stats.min_generation = 0; | ||
} | ||
|
||
Ok(stats) | ||
} | ||
|
||
pub fn verify_checksum(&self) -> Result<owned::Id, Error> { | ||
// TODO: Use/copy git_odb::hash::bytes_of_file. | ||
let data_len_without_trailer = self.data.len() - SHA1_SIZE; | ||
let mut hasher = git_features::hash::Sha1::default(); | ||
hasher.update(&self.data[..data_len_without_trailer]); | ||
let actual = owned::Id::new_sha1(hasher.digest()); | ||
|
||
let expected = self.checksum(); | ||
if actual.to_borrowed() == expected { | ||
Ok(actual) | ||
} else { | ||
Err(Error::Mismatch { | ||
actual, | ||
expected: expected.into(), | ||
}) | ||
} | ||
} | ||
} | ||
|
||
/// If the given path's filename matches "graph-{hash}.graph", check that `hash` matches the | ||
/// expected hash. | ||
fn verify_split_chain_filename_hash(path: impl AsRef<Path>, expected: borrowed::Id<'_>) -> Result<(), Error> { | ||
let path = path.as_ref(); | ||
path.file_name() | ||
.and_then(|filename| filename.to_str()) | ||
.and_then(|filename| filename.strip_suffix(".graph")) | ||
.and_then(|stem| stem.strip_prefix("graph-")) | ||
.map_or(Ok(()), |hex| match owned::Id::from_40_bytes_in_hex(hex.as_bytes()) { | ||
Ok(actual) if actual.to_borrowed() == expected => Ok(()), | ||
_ => Err(Error::Filename(format!( | ||
"graph-{}.graph", | ||
expected.to_sha1_hex().as_bstr() | ||
))), | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.