Skip to content

Commit

Permalink
Merge branch 'hours-upgrade'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Sep 19, 2022
2 parents 429cccc + 4d0977d commit 26489d1
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 94 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions gitoxide-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ default = []
## Discover all git repositories within a directory. Particularly useful with [skim](https://github.com/lotabout/skim).
organize = ["git-url", "jwalk"]
## Derive the amount of time invested into a git repository akin to [git-hours](https://github.com/kimmobrunfeldt/git-hours).
estimate-hours = ["itertools", "rayon", "fs-err"]
estimate-hours = ["itertools", "fs-err"]

#! ### Mutually Exclusive Networking
#! If both are set, _blocking-client_ will take precedence, allowing `--all-features` to be used.
Expand Down Expand Up @@ -59,7 +59,6 @@ blocking = { version = "1.0.2", optional = true }
git-url = { version = "^0.8.0", path = "../git-url", optional = true }
jwalk = { version = "0.6.0", optional = true }

rayon = { version = "1.5.0", optional = true }
itertools = { version = "0.10.1", optional = true }
fs-err = { version = "2.6.0", optional = true }

Expand Down
185 changes: 100 additions & 85 deletions gitoxide-core/src/hours.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,16 @@
use std::collections::BTreeSet;
use std::{
collections::{hash_map::Entry, HashMap},
ffi::OsStr,
io,
path::Path,
time::Instant,
};

use anyhow::{anyhow, bail};
use git_repository as git;
use git_repository::{
actor,
bstr::{BString, ByteSlice},
interrupt, objs,
prelude::*,
progress,
refs::file::ReferenceExt,
Progress,
};
use git_repository::bstr::{BStr, BString};
use git_repository::{actor, bstr::ByteSlice, interrupt, objs, prelude::*, progress, Progress};
use itertools::Itertools;
use rayon::prelude::*;

/// Additional configuration for the hours estimation functionality.
pub struct Context<W> {
Expand All @@ -41,7 +33,7 @@ pub struct Context<W> {
/// * _progress_ - A way to provide progress and performance information
pub fn estimate<W, P>(
working_dir: &Path,
refname: &OsStr,
rev_spec: &BStr,
mut progress: P,
Context {
show_pii,
Expand All @@ -55,69 +47,92 @@ where
P: Progress,
{
let repo = git::discover(working_dir)?.apply_environment();
let commit_id = repo
.refs
.find(refname.to_string_lossy().as_ref())?
.peel_to_id_in_place(&repo.refs, |oid, buf| {
repo.objects
.try_find(oid, buf)
.map(|obj| obj.map(|obj| (obj.kind, obj.data)))
})?
.to_owned();
let commit_id = repo.rev_parse_single(rev_spec)?.detach();
let mut string_heap = BTreeSet::<&'static [u8]>::new();

let (all_commits, is_shallow) = {
let start = Instant::now();
let mut progress = progress.add_child("Traverse commit graph");
progress.init(None, progress::count("commits"));
let mut commits: Vec<Vec<u8>> = Vec::new();
let commit_iter = interrupt::Iter::new(
commit_id.ancestors(|oid, buf| {
progress.inc();
repo.objects.find(oid, buf).map(|o| {
commits.push(o.data.to_owned());
objs::CommitRefIter::from_bytes(o.data)
})
}),
|| anyhow!("Cancelled by user"),
);
let mut is_shallow = false;
for c in commit_iter {
match c? {
Ok(c) => c,
Err(git::traverse::commit::ancestors::Error::FindExisting { .. }) => {
is_shallow = true;
break;
let string_heap = &mut string_heap;
std::thread::scope(
move |scope| -> anyhow::Result<(Vec<actor::SignatureRef<'static>>, bool)> {
let start = Instant::now();
progress.init(None, progress::count("commits"));
let (tx, rx) = std::sync::mpsc::channel::<Vec<u8>>();
let mailmap = repo.open_mailmap();

let handle = scope.spawn(move || -> anyhow::Result<Vec<actor::SignatureRef<'static>>> {
let mut out = Vec::new();
for commit_data in rx {
if let Some(author) = objs::CommitRefIter::from_bytes(&commit_data)
.author()
.map(|author| mailmap.resolve(author.trim()))
.ok()
{
let mut string_ref = |s: &BString| -> &'static BStr {
match string_heap.get(s.as_slice()) {
Some(n) => n.as_bstr(),
None => {
let sv: Vec<u8> = s.clone().into();
string_heap.insert(Box::leak(sv.into_boxed_slice()));
(*string_heap.get(s.as_slice()).expect("present")).as_ref()
}
}
};
let name = string_ref(&author.name);
let email = string_ref(&author.email);

out.push(actor::SignatureRef {
name,
email,
time: author.time,
});
}
}
out.shrink_to_fit();
out.sort_by(|a, b| {
a.email.cmp(&b.email).then(
a.time
.seconds_since_unix_epoch
.cmp(&b.time.seconds_since_unix_epoch)
.reverse(),
)
});
Ok(out)
});

let commit_iter = interrupt::Iter::new(
commit_id.ancestors(|oid, buf| {
progress.inc();
repo.objects.find(oid, buf).map(|o| {
tx.send(o.data.to_owned()).ok();
objs::CommitRefIter::from_bytes(o.data)
})
}),
|| anyhow!("Cancelled by user"),
);
let mut is_shallow = false;
for c in commit_iter {
match c? {
Ok(c) => c,
Err(git::traverse::commit::ancestors::Error::FindExisting { .. }) => {
is_shallow = true;
break;
}
Err(err) => return Err(err.into()),
};
}
Err(err) => return Err(err.into()),
};
}
progress.show_throughput(start);
(commits, is_shallow)
drop(tx);
progress.show_throughput(start);
Ok((handle.join().expect("no panic")?, is_shallow))
},
)?
};

let mailmap = repo.open_mailmap();
let start = Instant::now();
#[allow(clippy::redundant_closure)]
let mut all_commits: Vec<actor::Signature> = all_commits
.into_par_iter()
.filter_map(|commit_data: Vec<u8>| {
objs::CommitRefIter::from_bytes(&commit_data)
.author()
.map(|author| mailmap.resolve(author.trim()))
.ok()
})
.collect::<Vec<_>>();
all_commits.sort_by(|a, b| {
a.email.cmp(&b.email).then(
a.time
.seconds_since_unix_epoch
.cmp(&b.time.seconds_since_unix_epoch)
.reverse(),
)
});
if all_commits.is_empty() {
bail!("No commits to process");
}

let start = Instant::now();
let mut current_email = &all_commits[0].email;
let mut slice_start = 0;
let mut results_by_hours = Vec::new();
Expand Down Expand Up @@ -201,15 +216,15 @@ where
const MINUTES_PER_HOUR: f32 = 60.0;
const HOURS_PER_WORKDAY: f32 = 8.0;

fn estimate_hours(commits: &[actor::Signature]) -> WorkByEmail {
fn estimate_hours(commits: &[actor::SignatureRef<'static>]) -> WorkByEmail {
assert!(!commits.is_empty());
const MAX_COMMIT_DIFFERENCE_IN_MINUTES: f32 = 2.0 * MINUTES_PER_HOUR;
const FIRST_COMMIT_ADDITION_IN_MINUTES: f32 = 2.0 * MINUTES_PER_HOUR;

let hours = FIRST_COMMIT_ADDITION_IN_MINUTES / 60.0
+ commits.iter().rev().tuple_windows().fold(
0_f32,
|hours, (cur, next): (&actor::Signature, &actor::Signature)| {
|hours, (cur, next): (&actor::SignatureRef<'_>, &actor::SignatureRef<'_>)| {
let change_in_minutes =
(next.time.seconds_since_unix_epoch - cur.time.seconds_since_unix_epoch) as f32 / MINUTES_PER_HOUR;
if change_in_minutes < MAX_COMMIT_DIFFERENCE_IN_MINUTES {
Expand All @@ -221,19 +236,19 @@ fn estimate_hours(commits: &[actor::Signature]) -> WorkByEmail {
);
let author = &commits[0];
WorkByEmail {
name: author.name.to_owned(),
email: author.email.to_owned(),
name: author.name,
email: author.email,
hours,
num_commits: commits.len() as u32,
}
}

fn deduplicate_identities(persons: &[WorkByEmail]) -> Vec<WorkByPerson<'_>> {
let mut email_to_index = HashMap::<&BString, usize>::with_capacity(persons.len());
let mut name_to_index = HashMap::<&BString, usize>::with_capacity(persons.len());
let mut out = Vec::<WorkByPerson<'_>>::with_capacity(persons.len());
fn deduplicate_identities(persons: &[WorkByEmail]) -> Vec<WorkByPerson> {
let mut email_to_index = HashMap::<&'static BStr, usize>::with_capacity(persons.len());
let mut name_to_index = HashMap::<&'static BStr, usize>::with_capacity(persons.len());
let mut out = Vec::<WorkByPerson>::with_capacity(persons.len());
for person_by_email in persons {
match email_to_index.entry(&person_by_email.email) {
match email_to_index.entry(person_by_email.email) {
Entry::Occupied(email_entry) => {
out[*email_entry.get()].merge(person_by_email);
name_to_index.insert(&person_by_email.name, *email_entry.get());
Expand All @@ -256,14 +271,14 @@ fn deduplicate_identities(persons: &[WorkByEmail]) -> Vec<WorkByPerson<'_>> {
}

#[derive(Debug)]
struct WorkByPerson<'a> {
name: Vec<&'a BString>,
email: Vec<&'a BString>,
struct WorkByPerson {
name: Vec<&'static BStr>,
email: Vec<&'static BStr>,
hours: f32,
num_commits: u32,
}

impl<'a> WorkByPerson<'a> {
impl<'a> WorkByPerson {
fn merge(&mut self, other: &'a WorkByEmail) {
if !self.name.contains(&&other.name) {
self.name.push(&other.name);
Expand All @@ -276,18 +291,18 @@ impl<'a> WorkByPerson<'a> {
}
}

impl<'a> From<&'a WorkByEmail> for WorkByPerson<'a> {
impl<'a> From<&'a WorkByEmail> for WorkByPerson {
fn from(w: &'a WorkByEmail) -> Self {
WorkByPerson {
name: vec![&w.name],
email: vec![&w.email],
name: vec![w.name],
email: vec![w.email],
hours: w.hours,
num_commits: w.num_commits,
}
}
}

impl<'a> WorkByPerson<'a> {
impl WorkByPerson {
fn write_to(&self, total_hours: f32, mut out: impl std::io::Write) -> std::io::Result<()> {
writeln!(
out,
Expand All @@ -308,8 +323,8 @@ impl<'a> WorkByPerson<'a> {

#[derive(Debug)]
struct WorkByEmail {
name: BString,
email: BString,
name: &'static BStr,
email: &'static BStr,
hours: f32,
num_commits: u32,
}
4 changes: 2 additions & 2 deletions src/porcelain/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pub fn main() -> Result<()> {
Subcommands::Tool(tool) => match tool {
crate::porcelain::options::ToolCommands::EstimateHours(crate::porcelain::options::EstimateHours {
working_dir,
refname,
rev_spec,
no_bots,
show_pii,
omit_unify_identities,
Expand All @@ -53,7 +53,7 @@ pub fn main() -> Result<()> {
move |progress, out, _err| {
hours::estimate(
&working_dir,
&refname,
rev_spec.as_ref(),
progress,
hours::Context {
show_pii,
Expand Down
10 changes: 6 additions & 4 deletions src/porcelain/options.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use std::{ffi::OsString, path::PathBuf};
use git::bstr::BString;
use git_repository as git;
use std::path::PathBuf;

#[derive(Debug, clap::Parser)]
#[clap(about = "The rusty git", version = clap::crate_version!())]
Expand Down Expand Up @@ -93,9 +95,9 @@ pub struct EstimateHours {
#[clap(validator_os = validator::is_repo)]
#[clap(default_value = ".")]
pub working_dir: PathBuf,
/// The name of the ref like 'HEAD' or 'main' at which to start iterating the commit graph.
#[clap(default_value("HEAD"))]
pub refname: OsString,
/// The name of the revision as spec, like 'HEAD' or 'main' at which to start iterating the commit graph.
#[clap(default_value("HEAD"), parse(try_from_os_str = git::env::os_str_to_bstring))]
pub rev_spec: BString,
/// Ignore github bots which match the `[bot]` search string.
#[clap(short = 'b', long)]
pub no_bots: bool,
Expand Down

0 comments on commit 26489d1

Please sign in to comment.