Skip to content

Commit

Permalink
refactor: split data::output::count::objects into files(#67)
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Sep 21, 2021
1 parent b209da2 commit 8fe4612
Show file tree
Hide file tree
Showing 13 changed files with 773 additions and 759 deletions.
3 changes: 2 additions & 1 deletion cargo-smart-release/src/command/changelog/commit/history.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::command::changelog_impl::commit::Message;
use git_repository as git;

use crate::command::changelog_impl::commit::Message;

/// A head reference will all commits that are 'governed' by it, that is are in its exclusive ancestry.
pub struct Segment<'a> {
pub _head: git::refs::Reference,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use git_conventional::Type;
use git_repository as git;
use git_repository::bstr::{BStr, ByteSlice};

use crate::command::changelog_impl::commit::Message;
use git_conventional::Type;

#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq, Eq))]
Expand Down
3 changes: 2 additions & 1 deletion git-pack/src/cache/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ use crate::cache;
mod memory {
use std::num::NonZeroUsize;

use crate::cache;
use clru::WeightScale;

use crate::cache;

struct Entry {
data: Vec<u8>,
kind: git_object::Kind,
Expand Down
9 changes: 7 additions & 2 deletions git-pack/src/data/output/count/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ impl Count {
}
}

#[path = "objects/mod.rs"]
mod objects_impl;
pub use objects_impl::{objects, objects_unthreaded};

///
pub mod objects;
pub use objects::{objects, objects_unthreaded};
pub mod objects {
pub use super::objects_impl::{Error, ObjectExpansion, Options, Outcome, Result};
}
750 changes: 0 additions & 750 deletions git-pack/src/data/output/count/objects.rs

This file was deleted.

431 changes: 431 additions & 0 deletions git-pack/src/data/output/count/objects/mod.rs

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions git-pack/src/data/output/count/objects/reduce.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
use std::{marker::PhantomData, sync::Arc};

use git_features::{parallel, progress::Progress};

use super::Outcome;
use crate::data::output;

pub struct Statistics<E, P> {
total: Outcome,
counts: Vec<output::Count>,
progress: Arc<parking_lot::Mutex<P>>,
_err: PhantomData<E>,
}

impl<E, P> Statistics<E, P>
where
P: Progress,
{
pub fn new(progress: Arc<parking_lot::Mutex<P>>) -> Self {
Statistics {
total: Default::default(),
counts: Default::default(),
progress,
_err: PhantomData::default(),
}
}
}

impl<E, P> parallel::Reduce for Statistics<E, P>
where
P: Progress,
{
type Input = Result<(Vec<output::Count>, Outcome), E>;
type FeedProduce = ();
type Output = (Vec<output::Count>, Outcome);
type Error = E;

fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> {
let (counts, mut stats) = item?;
stats.total_objects = counts.len();
self.total.aggregate(stats);
self.progress.lock().inc_by(counts.len());
self.counts.extend(counts);
Ok(())
}

fn finalize(self) -> Result<Self::Output, Self::Error> {
Ok((self.counts, self.total))
}
}
114 changes: 114 additions & 0 deletions git-pack/src/data/output/count/objects/tree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
pub mod changes {
use git_diff::tree::{
visit::{Action, Change},
Visit,
};
use git_hash::ObjectId;
use git_object::bstr::BStr;

use crate::data::output::count::objects_impl::util::InsertImmutable;

pub struct AllNew<'a, H> {
pub objects: Vec<ObjectId>,
all_seen: &'a H,
}

impl<'a, H> AllNew<'a, H>
where
H: InsertImmutable<ObjectId>,
{
pub fn new(all_seen: &'a H) -> Self {
AllNew {
objects: Default::default(),
all_seen,
}
}
pub fn clear(&mut self) {
self.objects.clear();
}
}

impl<'a, H> Visit for AllNew<'a, H>
where
H: InsertImmutable<ObjectId>,
{
fn pop_front_tracked_path_and_set_current(&mut self) {}

fn push_back_tracked_path_component(&mut self, _component: &BStr) {}

fn push_path_component(&mut self, _component: &BStr) {}

fn pop_path_component(&mut self) {}

fn visit(&mut self, change: Change) -> Action {
match change {
Change::Addition { oid, .. } | Change::Modification { oid, .. } => {
let inserted = self.all_seen.insert(oid);
if inserted {
self.objects.push(oid);
}
}
Change::Deletion { .. } => {}
};
Action::Continue
}
}
}

pub mod traverse {
use git_hash::ObjectId;
use git_object::{bstr::BStr, tree::EntryRef};
use git_traverse::tree::{visit::Action, Visit};

use crate::data::output::count::objects_impl::util::InsertImmutable;

pub struct AllUnseen<'a, H> {
pub non_trees: Vec<ObjectId>,
all_seen: &'a H,
}

impl<'a, H> AllUnseen<'a, H>
where
H: InsertImmutable<ObjectId>,
{
pub fn new(all_seen: &'a H) -> Self {
AllUnseen {
non_trees: Default::default(),
all_seen,
}
}
pub fn clear(&mut self) {
self.non_trees.clear();
}
}

impl<'a, H> Visit for AllUnseen<'a, H>
where
H: InsertImmutable<ObjectId>,
{
fn pop_front_tracked_path_and_set_current(&mut self) {}

fn push_back_tracked_path_component(&mut self, _component: &BStr) {}

fn push_path_component(&mut self, _component: &BStr) {}

fn pop_path_component(&mut self) {}

fn visit_tree(&mut self, entry: &EntryRef<'_>) -> Action {
let inserted = self.all_seen.insert(entry.oid.to_owned());
if inserted {
Action::Continue
} else {
Action::Skip
}
}

fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> Action {
let inserted = self.all_seen.insert(entry.oid.to_owned());
if inserted {
self.non_trees.push(entry.oid.to_owned());
}
Action::Continue
}
}
}
115 changes: 115 additions & 0 deletions git-pack/src/data/output/count/objects/types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/// Information gathered during the run of [`iter_from_objects()`][super::objects()].
#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub struct Outcome {
/// The amount of objects provided to start the iteration.
pub input_objects: usize,
/// The amount of objects that have been expanded from the input source.
/// It's desirable to do that as expansion happens on multiple threads, allowing the amount of input objects to be small.
/// `expanded_objects - decoded_objects` is the 'cheap' object we found without decoding the object itself.
pub expanded_objects: usize,
/// The amount of fully decoded objects. These are the most expensive as they are fully decoded
pub decoded_objects: usize,
/// The total amount of encountered objects. Should be `expanded_objects + input_objects`.
pub total_objects: usize,
}

impl Outcome {
pub(in crate::data::output::count) fn aggregate(
&mut self,
Outcome {
input_objects,
decoded_objects,
expanded_objects,
total_objects,
}: Self,
) {
self.input_objects += input_objects;
self.decoded_objects += decoded_objects;
self.expanded_objects += expanded_objects;
self.total_objects += total_objects;
}
}

/// The way input objects are handled
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub enum ObjectExpansion {
/// Don't do anything with the input objects except for transforming them into pack entries
AsIs,
/// If the input object is a Commit then turn it into a pack entry. Additionally obtain its tree, turn it into a pack entry
/// along with all of its contents, that is nested trees, and any other objects reachable from it.
/// Otherwise, the same as [`AsIs`][ObjectExpansion::AsIs].
///
/// This mode is useful if all reachable objects should be added, as in cloning a repository.
TreeContents,
/// If the input is a commit, obtain its ancestors and turn them into pack entries. Obtain the ancestor trees along with the commits
/// tree and turn them into pack entries. Finally obtain the added/changed objects when comparing the ancestor trees with the
/// current tree and turn them into entries as well.
/// Otherwise, the same as [`AsIs`][ObjectExpansion::AsIs].
///
/// This mode is useful to build a pack containing only new objects compared to a previous state.
TreeAdditionsComparedToAncestor,
}

impl Default for ObjectExpansion {
fn default() -> Self {
ObjectExpansion::AsIs
}
}

/// Configuration options for the pack generation functions provied in [this module][crate::data::output].
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub struct Options {
/// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used.
/// If more than one thread is used, the order of returned [counts][crate::data::output::Count] is not deterministic anymore
/// especially when tree traversal is involved. Thus deterministic ordering requires `Some(1)` to be set.
pub thread_limit: Option<usize>,
/// The amount of objects per chunk or unit of work to be sent to threads for processing
pub chunk_size: usize,
/// The way input objects are handled
pub input_object_expansion: ObjectExpansion,
/// The size of a per-thread object cache in bytes to accelerate tree diffs in conjunction
/// with [ObjectExpansion::TreeAdditionsComparedToAncestor].
///
/// If zero, the cache is disabled but in a costly way. Consider using a low value instead.
///
/// Defaults to 10 megabytes which usually leads to 2.5x speedups.
#[cfg(feature = "object-cache-dynamic")]
pub object_cache_size_in_bytes: usize,
}

impl Default for Options {
fn default() -> Self {
Options {
thread_limit: None,
chunk_size: 10,
input_object_expansion: Default::default(),
#[cfg(feature = "object-cache-dynamic")]
object_cache_size_in_bytes: 10 * 1024 * 1024,
}
}
}

/// The error returned by the pack generation iterator [bytes::FromEntriesIter][crate::data::output::bytes::FromEntriesIter].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error<FindErr, IterErr>
where
FindErr: std::error::Error + 'static,
IterErr: std::error::Error + 'static,
{
#[error(transparent)]
CommitDecode(git_object::decode::Error),
#[error(transparent)]
FindExisting(#[from] FindErr),
#[error(transparent)]
InputIteration(IterErr),
#[error(transparent)]
TreeTraverse(git_traverse::tree::breadthfirst::Error),
#[error(transparent)]
TreeChanges(git_diff::tree::changes::Error),
#[error("Operation interrupted")]
Interrupted,
}
48 changes: 48 additions & 0 deletions git-pack/src/data/output/count/objects/util.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
pub trait InsertImmutable<Item: Eq + std::hash::Hash> {
fn insert(&self, item: Item) -> bool;
}

mod trait_impls {
use std::{cell::RefCell, collections::HashSet, hash::Hash};

use dashmap::DashSet;

use super::InsertImmutable;

impl<T: Eq + Hash> InsertImmutable<T> for DashSet<T> {
fn insert(&self, item: T) -> bool {
self.insert(item)
}
}

impl<T: Eq + Hash> InsertImmutable<T> for RefCell<HashSet<T>> {
fn insert(&self, item: T) -> bool {
self.borrow_mut().insert(item)
}
}
}

pub struct Chunks<I> {
pub size: usize,
pub iter: I,
}

impl<I, Item> Iterator for Chunks<I>
where
I: Iterator<Item = Item>,
{
type Item = Vec<Item>;

fn next(&mut self) -> Option<Self::Item> {
let mut res = Vec::with_capacity(self.size);
let mut items_left = self.size;
for item in &mut self.iter {
res.push(item);
items_left -= 1;
if items_left == 0 {
break;
}
}
(!res.is_empty()).then(|| res)
}
}
2 changes: 1 addition & 1 deletion git-repository/src/easy/ext/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::{convert::TryInto, ops::DerefMut};

use git_hash::ObjectId;
use git_odb::{Find, FindExt};
use git_pack::cache::Object;
use git_ref::{
transaction::{LogChange, PreviousValue, RefLog},
FullName,
Expand All @@ -12,7 +13,6 @@ use crate::{
easy::{commit, object, ObjectRef, Oid},
ext::ObjectIdExt,
};
use git_pack::cache::Object;

/// Methods related to object creation.
pub trait ObjectAccessExt: easy::Access + Sized {
Expand Down
Loading

0 comments on commit 8fe4612

Please sign in to comment.