Skip to content

Commit

Permalink
support for loose object statistics in odb store (#287)
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Jan 2, 2022
1 parent 3dfec81 commit 53d835a
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 21 deletions.
89 changes: 76 additions & 13 deletions git-odb/src/store_impls/dynamic/verify.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use crate::pack;
use crate::store::verify::integrity::{IndexStatistics, SingleOrMultiStatistics};
use crate::types::IndexAndPacks;
use git_features::progress::Progress;
use std::ops::Deref;
use std::sync::atomic::{AtomicBool, Ordering};

#[allow(missing_docs, unused)]

///
pub mod integrity {
use crate::pack;
use std::path::PathBuf;

/// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()].
#[derive(Debug, thiserror::Error)]
Expand All @@ -21,6 +21,8 @@ pub mod integrity {
#[error(transparent)]
IndexOpen(#[from] pack::index::init::Error),
#[error(transparent)]
LooseObjectStoreIntegrity(#[from] crate::loose::verify::integrity::Error),
#[error(transparent)]
MultiIndexOpen(#[from] pack::multi_index::init::Error),
#[error(transparent)]
PackOpen(#[from] pack::data::init::Error),
Expand All @@ -30,10 +32,41 @@ pub mod integrity {
NeedsRetryDueToChangeOnDisk,
}

#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
/// Integrity information about loose object databases
pub struct LooseObjectStatistics {
/// The path to the root directory of the loose objects database
pub path: PathBuf,
/// The statistics created after verifying the loose object database.
pub statistics: crate::loose::verify::integrity::Statistics,
}

#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
/// Traversal statistics of packs governed by single indices or multi-pack indices.
#[allow(missing_docs)]
pub enum SingleOrMultiStatistics {
Single(pack::index::traverse::Statistics),
Multi(Vec<(PathBuf, pack::index::traverse::Statistics)>),
}

/// Statistics gathered when traversing packs of various kinds of indices.
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub struct IndexStatistics {
/// The path to the index or multi-pack index for which statics were gathered.
pub path: PathBuf,
/// The actual statistics for the index at `path`.
pub statistics: SingleOrMultiStatistics,
}

/// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()].
pub struct Outcome<P> {
/// Pack traversal statistics for each pack whose objects were checked.
pub pack_traverse_statistics: Vec<pack::index::traverse::Statistics>,
/// Statistics for validated loose object stores.
pub loose_object_stores: Vec<LooseObjectStatistics>,
/// Pack traversal statistics for each index and their pack(s)
pub index_statistics: Vec<IndexStatistics>,
/// The provided progress instance.
pub progress: P,
}
Expand Down Expand Up @@ -104,11 +137,14 @@ impl super::Store {
progress.add_child("Checking integrity"),
should_interrupt,
)?;
statistics.push(
outcome
.pack_traverse_statistics
.expect("pack provided so there are stats"),
);
statistics.push(IndexStatistics {
path: bundle.index.path().to_owned(),
statistics: SingleOrMultiStatistics::Single(
outcome
.pack_traverse_statistics
.expect("pack provided so there are stats"),
),
});
}
IndexAndPacks::MultiIndex(bundle) => {
let index;
Expand All @@ -124,18 +160,45 @@ impl super::Store {
should_interrupt,
options.clone(),
)?;
statistics.extend(outcome.pack_traverse_statistics);

let index_dir = bundle.multi_index.path().parent().expect("file in a directory");
statistics.push(IndexStatistics {
path: Default::default(),
statistics: SingleOrMultiStatistics::Multi(
outcome
.pack_traverse_statistics
.into_iter()
.zip(index.index_names())
.map(|(statistics, index_name)| (index_dir.join(index_name), statistics))
.collect(),
),
});
}
}
progress.inc();
}

for _loose_db in &*index.loose_dbs {
// TODO: impl verify integrity for loose object databases
progress.init(
Some(index.loose_dbs.len()),
git_features::progress::count("loose object stores"),
);
let mut loose_object_stores = Vec::new();
for loose_db in &*index.loose_dbs {
let out = loose_db
.verify_integrity(
progress.add_child(loose_db.path().display().to_string()),
should_interrupt,
)
.map(|statistics| integrity::LooseObjectStatistics {
path: loose_db.path().to_owned(),
statistics,
})?;
loose_object_stores.push(out);
}

Ok(integrity::Outcome {
pack_traverse_statistics: statistics,
loose_object_stores,
index_statistics: statistics,
progress,
})
}
Expand Down
26 changes: 22 additions & 4 deletions git-odb/src/store_impls/loose/verify.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::loose::Store;
use crate::Write;
use git_features::progress::Progress;
use std::sync::atomic::{AtomicBool, Ordering};

///
pub mod integrity {
Expand All @@ -21,22 +23,32 @@ pub mod integrity {
},
#[error("Objects were deleted during iteration - try again")]
Retry,
#[error("Interrupted")]
Interrupted,
}

/// The outcome returned by [`verify_integrity()`][super::Store::verify_integrity()].
pub struct Outcome {
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub struct Statistics {
/// The amount of loose objects we checked.
pub num_objects: usize,
}
}

impl Store {
/// Check all loose objects for their integrity checking their hash matches the actual data and by decoding them fully.
pub fn verify_integrity(&self) -> Result<integrity::Outcome, integrity::Error> {
pub fn verify_integrity(
&self,
mut progress: impl Progress,
should_interrupt: &AtomicBool,
) -> Result<integrity::Statistics, integrity::Error> {
let mut buf = Vec::new();
let mut num_objects = 0;
let sink = crate::sink(self.object_hash);

let mut num_objects = 0;
let mut progress = progress.add_child("validating");
progress.init(None, git_features::progress::count("objects"));
for id in self.iter().filter_map(Result::ok) {
let object = self
.try_find(id, &mut buf)
Expand All @@ -55,8 +67,14 @@ impl Store {
kind: object.kind,
id,
})?;

progress.inc();
num_objects += 1;
if should_interrupt.load(Ordering::SeqCst) {
return Err(integrity::Error::Interrupted);
}
}
Ok(integrity::Outcome { num_objects })

Ok(integrity::Statistics { num_objects })
}
}
22 changes: 19 additions & 3 deletions git-odb/tests/odb/store/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,7 @@ fn auto_refresh_with_and_without_id_stability() -> crate::Result {
mod verify {
use crate::store::dynamic::db;
use git_features::progress;
use git_testtools::fixture_path;
use std::sync::atomic::AtomicBool;

#[test]
Expand All @@ -580,10 +581,25 @@ mod verify {
.store_ref()
.verify_integrity(progress::Discard, &AtomicBool::new(false), Default::default())
.unwrap();
assert_eq!(outcome.index_statistics.len(), 3, "there are only three packs to check");
assert_eq!(
outcome.pack_traverse_statistics.len(),
3,
"there are only three packs to check"
outcome.index_statistics[0].path,
fixture_path("objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx")
);
assert_eq!(
outcome.index_statistics[1].path,
fixture_path("objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx")
);
assert_eq!(
outcome.index_statistics[2].path,
fixture_path("objects/pack/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx")
);
assert_eq!(
outcome.loose_object_stores,
vec![git_odb::store::verify::integrity::LooseObjectStatistics {
path: fixture_path("objects"),
statistics: git_odb::loose::verify::integrity::Statistics { num_objects: 7 }
}]
);

assert_eq!(
Expand Down
4 changes: 3 additions & 1 deletion git-odb/tests/odb/store/loose.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use git_actor::{Sign, Time};
use git_object::bstr::ByteSlice;
use std::sync::atomic::AtomicBool;

use git_features::progress;
use git_odb::loose::Store;
use pretty_assertions::assert_eq;

Expand Down Expand Up @@ -35,7 +37,7 @@ pub fn locate_oid(id: git_hash::ObjectId, buf: &mut Vec<u8>) -> git_object::Data
#[test]
fn verify_integrity() {
let db = ldb();
let outcome = db.verify_integrity().unwrap();
let outcome = db.verify_integrity(progress::Discard, &AtomicBool::new(false)).unwrap();
assert_eq!(outcome.num_objects, 7);
}

Expand Down

0 comments on commit 53d835a

Please sign in to comment.