Skip to content

Commit

Permalink
prepare for re-encoding each pack object
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Jul 17, 2020
1 parent e851cbe commit afae684
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 13 deletions.
3 changes: 2 additions & 1 deletion git-odb/src/pack/bundle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ impl Bundle {
pub fn verify_checksums<P, C>(
&self,
thread_limit: Option<usize>,
mode: pack::index::verify::Mode,
progress: Option<P>,
make_cache: impl Fn() -> C + Send + Sync,
) -> Result<(owned::Id, Option<pack::index::verify::Outcome>), pack::index::verify::Error>
Expand All @@ -86,7 +87,7 @@ impl Bundle {
C: pack::cache::DecodeEntry,
{
self.index
.verify_checksum_of_index(Some(&self.pack), thread_limit, progress, make_cache)
.verify_checksum_of_index(Some(&self.pack), thread_limit, mode, progress, make_cache)
}
}

Expand Down
2 changes: 1 addition & 1 deletion git-odb/src/pack/data/decoded.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub struct Entry {
pub header: Header,
/// The decompressed size of the object in bytes
pub decompressed_size: u64,
/// absolute offset to compressed object data in the pack
/// absolute offset to compressed object data in the pack, just behind the header
pub data_offset: u64,
}

Expand Down
19 changes: 15 additions & 4 deletions git-odb/src/pack/index/verify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,17 @@ pub struct Outcome {
pub pack_size: u64,
}

/// Various ways in which a pack and index can be verified
#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
pub enum Mode {
/// Validate SHA1 and CRC32
Sha1CRC32,
/// Validate SHA1 and CRC32, and decode each non-Blob object
Sha1CRC32Decode,
/// Validate SHA1 and CRC32, and decode and encode each non-Blob object
Sha1CRC32DecodeEncode,
}

/// Verify and validate the content of the index file
impl index::File {
pub fn checksum_of_index(&self) -> owned::Id {
Expand All @@ -94,6 +105,7 @@ impl index::File {
&self,
pack: Option<&pack::data::File>,
thread_limit: Option<usize>,
mode: Mode,
progress: Option<P>,
make_cache: impl Fn() -> C + Send + Sync,
) -> Result<(owned::Id, Option<Outcome>), Error>
Expand Down Expand Up @@ -281,10 +293,9 @@ impl index::File {
});
}
if let Some(desired_crc32) = index_entry.crc32 {
let actual_crc32 = pack.entry_crc32(
index_entry.pack_offset,
(pack_entry_data_offset - index_entry.pack_offset) as usize + consumed_input,
);
let header_size = (pack_entry_data_offset - index_entry.pack_offset) as usize;
let actual_crc32 =
pack.entry_crc32(index_entry.pack_offset, header_size + consumed_input);
if actual_crc32 != desired_crc32 {
return Err(Error::Crc32Mismatch {
actual: actual_crc32,
Expand Down
16 changes: 12 additions & 4 deletions git-odb/tests/pack/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,14 @@ fn pack_lookup() {
assert_eq!(pack.kind(), pack::data::Kind::V2);
assert_eq!(pack.num_objects(), idx.num_objects());
assert_eq!(
idx.verify_checksum_of_index(Some(&pack), None, Discard.into(), || DecodeEntryNoop)
.unwrap(),
idx.verify_checksum_of_index(
Some(&pack),
None,
index::verify::Mode::Sha1CRC32DecodeEncode,
Discard.into(),
|| DecodeEntryNoop
)
.unwrap(),
(idx.checksum_of_index(), Some(stats.to_owned()))
);
for idx_entry in idx.iter() {
Expand Down Expand Up @@ -199,8 +205,10 @@ fn iter() {
assert_eq!(idx.version(), *version);
assert_eq!(idx.num_objects(), *num_objects);
assert_eq!(
idx.verify_checksum_of_index(None, None, Discard.into(), || DecodeEntryNoop)
.unwrap(),
idx.verify_checksum_of_index(None, None, index::verify::Mode::Sha1CRC32Decode, Discard.into(), || {
DecodeEntryNoop
})
.unwrap(),
(idx.checksum_of_index(), None)
);
assert_eq!(idx.checksum_of_index(), hex_to_id(index_checksum));
Expand Down
5 changes: 4 additions & 1 deletion gitoxide-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ pub struct Context<W1: io::Write, W2: io::Write> {
/// Otherwise, usually use as many threads as there are logical cores.
/// A value of 0 is interpreted as no-limit
pub thread_limit: Option<usize>,
pub mode: index::verify::Mode,
}

impl Default for Context<Vec<u8>, Vec<u8>> {
fn default() -> Self {
Context {
output_statistics: None,
thread_limit: None,
mode: VerifyMode::Sha1CRC32,
out: Vec::new(),
err: Vec::new(),
}
Expand Down Expand Up @@ -95,6 +97,7 @@ pub fn verify_pack_or_pack_index<P, W1, W2>(
Context {
mut out,
mut err,
mode,
output_statistics,
thread_limit,
}: Context<W1, W2>,
Expand Down Expand Up @@ -132,7 +135,7 @@ where
Err(e)
})
.ok();
idx.verify_checksum_of_index(pack.as_ref(), thread_limit, progress, || -> EitherCache {
idx.verify_checksum_of_index(pack.as_ref(), thread_limit, mode, progress, || -> EitherCache {
if output_statistics.is_some() {
// turn off acceleration as we need to see entire chains all the time
EitherCache::Left(pack::cache::DecodeEntryNoop)
Expand Down
16 changes: 16 additions & 0 deletions src/plumbing/lean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@ mod options {
/// print the program version.
pub version: bool,

#[argh(switch)]
/// Decode and parse tags, commits and trees to validate their correctness beyond hashing correctly.
///
/// Malformed objects should not usually occur, but could be injected on purpose or accident.
/// This will reduce overall performance.
pub decode: bool,

#[argh(switch)]
/// Decode and parse tags, commits and trees to validate their correctness, and re-encode them.
///
/// This flag is primarily to test the implementation of encoding, and requires to decode the object first.
/// Encoding an object after decoding it should yield exactly the same bytes.
/// This will reduce overall performance even more, as re-encoding requires to transform zero-copy objects into
/// owned objects, causing plenty of allocation to occour.
pub re_encode: bool,

#[argh(option, short = 't')]
/// the amount of threads to use for some operations.
///
Expand Down
4 changes: 2 additions & 2 deletions tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
* [ ] ~~support streamed objects (similar to how it's done with loose objects)~~ - no need, all slices support io::Read, and we don't
actually support streaming, so let's net unify 'interfaces' on a low level like this.
* **owned objects**
* [ ] encode object
* [x] encode object
* [x] blob
* [x] tag
* [x] tree
* [x] commit
* [ ] write loose
* **pack verify**
* add '--some-flag' to run every non-blob through a decode/encode cycle to see if all objects can be parsed
* add that to the stress test
* **plumbing - explode pack**
* [ ] write loose object
* [ ] single threaded
* [ ] multi-threaded
* [ ] progress
Expand Down
6 changes: 6 additions & 0 deletions tests/stateless-journey.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ title "CLI ${kind}"
expect_run $SUCCESSFULLY "$exe_plumbing" verify-pack --statistics "$PACK_INDEX_FILE"
}
)
(with "decode"
it "verifies the pack index successfully and with desired output, and decodes all objects" && {
WITH_SNAPSHOT="$snapshot/plumbing-verify-pack-index-success" \
expect_run $SUCCESSFULLY "$exe_plumbing" verify-pack --decode "$PACK_INDEX_FILE"
}
)
if test "$kind" = "max"; then
(with "statistics (JSON)"
it "verifies the pack index successfully and with desired output" && {
Expand Down

0 comments on commit afae684

Please sign in to comment.