Skip to content

Commit

Permalink
Use a custom hasher for 'seen' objects hashset… (#67)
Browse files Browse the repository at this point in the history
…for about 10% of performance, speeding up these lookups just a little
bit.
  • Loading branch information
Byron committed Sep 22, 2021
1 parent faf6f81 commit 70179e2
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 8 deletions.
5 changes: 3 additions & 2 deletions git-pack/src/data/output/count/objects/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod util;

mod types;
pub use types::{Error, ObjectExpansion, Options, Outcome};

mod tree;

/// The return type used by [`objects()`].
Expand Down Expand Up @@ -68,7 +69,7 @@ where
iter: objects_ids,
size: chunk_size,
};
let seen_objs = dashmap::DashSet::<ObjectId>::new();
let seen_objs = dashmap::DashSet::<ObjectId, types::OidState>::default();
let progress = Arc::new(parking_lot::Mutex::new(progress));

parallel::in_parallel(
Expand Down Expand Up @@ -124,7 +125,7 @@ where
Oid: Into<ObjectId> + Send,
IterErr: std::error::Error + Send,
{
let seen_objs = RefCell::new(HashSet::<ObjectId>::new());
let seen_objs = RefCell::new(HashSet::<ObjectId, types::OidState>::default());

let (mut buf1, mut buf2) = (Vec::new(), Vec::new());
expand::this(
Expand Down
9 changes: 5 additions & 4 deletions git-pack/src/data/output/count/objects/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@ pub mod changes {
Visit,
};
use git_hash::ObjectId;
use git_object::bstr::BStr;
use git_object::{bstr::BStr, tree::EntryMode};

use crate::data::output::count::objects_impl::util::InsertImmutable;
use git_object::tree::EntryMode;

pub struct AllNew<'a, H> {
pub objects: Vec<ObjectId>,
Expand Down Expand Up @@ -61,11 +60,13 @@ pub mod changes {

pub mod traverse {
use git_hash::ObjectId;
use git_object::{bstr::BStr, tree::EntryRef};
use git_object::{
bstr::BStr,
tree::{EntryMode, EntryRef},
};
use git_traverse::tree::{visit::Action, Visit};

use crate::data::output::count::objects_impl::util::InsertImmutable;
use git_object::tree::EntryMode;

pub struct AllUnseen<'a, H> {
pub non_trees: Vec<ObjectId>,
Expand Down
31 changes: 31 additions & 0 deletions git-pack/src/data/output/count/objects/types.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
use std::{
convert::TryInto,
hash::{BuildHasher, Hasher},
};

/// Information gathered during the run of [`iter_from_objects()`][super::objects()].
#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
Expand All @@ -14,6 +19,32 @@ pub struct Outcome {
pub total_objects: usize,
}

#[derive(Default, Clone)]
pub struct OidState;

#[derive(Default, Clone)]
pub struct OidHasher {
digest: u64,
}

impl Hasher for OidHasher {
fn finish(&self) -> u64 {
self.digest
}

fn write(&mut self, bytes: &[u8]) {
self.digest = u64::from_be_bytes(bytes[..8].try_into().expect("any git hash has more than 8 bytes"));
}
}

impl BuildHasher for OidState {
type Hasher = OidHasher;

fn build_hasher(&self) -> Self::Hasher {
OidHasher::default()
}
}

impl Outcome {
pub(in crate::data::output::count) fn aggregate(
&mut self,
Expand Down
5 changes: 3 additions & 2 deletions git-pack/src/data/output/count/objects/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ mod trait_impls {
use dashmap::DashSet;

use super::InsertImmutable;
use crate::data::output::count::objects_impl::types::OidState;

impl<T: Eq + Hash> InsertImmutable<T> for DashSet<T> {
impl<T: Eq + Hash> InsertImmutable<T> for DashSet<T, OidState> {
fn insert(&self, item: T) -> bool {
self.insert(item)
}
}

impl<T: Eq + Hash> InsertImmutable<T> for RefCell<HashSet<T>> {
impl<T: Eq + Hash> InsertImmutable<T> for RefCell<HashSet<T, OidState>> {
fn insert(&self, item: T) -> bool {
self.borrow_mut().insert(item)
}
Expand Down

0 comments on commit 70179e2

Please sign in to comment.