Skip to content

Commit

Permalink
feat!: represent object cache configuration like `GITOXIDE_PACK_CACHE…
Browse files Browse the repository at this point in the history
…_MEMORY` in git-configuration.

That way there is a unified system for how to set values, which may be overridable by configuration
variables or not.

With this changes, the explicit application of environment variables for setting the cache
isn't required anymore as everything happens using git-configuration, and automatically,
while providing full control like before.
  • Loading branch information
Byron committed Nov 29, 2022
1 parent a4ac9cf commit becbd8d
Show file tree
Hide file tree
Showing 12 changed files with 151 additions and 86 deletions.
2 changes: 1 addition & 1 deletion git-repository/examples/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use git_repository as git;
use git_repository::Reference;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut repo = git::discover(".")?.apply_environment();
let mut repo = git::discover(".")?;
println!("Repo: {}", repo.work_dir().unwrap_or_else(|| repo.git_dir()).display());
let mut max_commit_size = 0;
let mut avg_commit_size = 0;
Expand Down
40 changes: 35 additions & 5 deletions git-repository/src/config/cache/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ impl Cache {
ssh_prefix: _,
http_transport,
identity,
gitoxide_prefix,
}: repository::permissions::Environment,
repository::permissions::Config {
git_binary: use_installation,
Expand Down Expand Up @@ -133,7 +134,7 @@ impl Cache {
source: git_config::Source::Api,
})?;
}
apply_environment_overrides(&mut globals, *git_prefix, http_transport, identity)?;
apply_environment_overrides(&mut globals, *git_prefix, http_transport, identity, gitoxide_prefix)?;
globals
};

Expand All @@ -144,12 +145,16 @@ impl Cache {
let ignore_case = config_bool(&config, "core.ignoreCase", false, lenient_config)?;
let use_multi_pack_index = config_bool(&config, "core.multiPackIndex", true, lenient_config)?;
let object_kind_hint = util::disambiguate_hint(&config);
let (pack_cache_bytes, object_cache_bytes) =
util::parse_object_caches(&config, lenient_config, filter_config_section)?;
// NOTE: When adding a new initial cache, consider adjusting `reread_values_and_clear_caches()` as well.
Ok(Cache {
resolved: config.into(),
use_multi_pack_index,
object_hash,
object_kind_hint,
pack_cache_bytes,
object_cache_bytes,
reflog,
is_bare,
ignore_case,
Expand Down Expand Up @@ -203,6 +208,8 @@ impl Cache {
self.personas = Default::default();
self.url_rewrite = Default::default();
self.diff_algorithm = Default::default();
(self.pack_cache_bytes, self.object_cache_bytes) =
util::parse_object_caches(config, self.lenient_config, self.filter_config_section)?;
#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))]
{
self.url_scheme = Default::default();
Expand Down Expand Up @@ -244,6 +251,7 @@ fn apply_environment_overrides(
git_prefix: Permission,
http_transport: Permission,
identity: Permission,
gitoxide_prefix: Permission,
) -> Result<(), Error> {
fn var_as_bstring(var: &str, perm: Permission) -> Option<BString> {
perm.check_opt(var)
Expand Down Expand Up @@ -418,11 +426,33 @@ fn apply_environment_overrides(
.new_section("gitoxide", Some(Cow::Borrowed("objects".into())))
.expect("statically known valid section name");

for (var, key) in [
("GIT_NO_REPLACE_OBJECTS", "noReplace"),
("GIT_REPLACE_REF_BASE", "replaceRefBase"),
for (var, key, permission) in [
("GIT_NO_REPLACE_OBJECTS", "noReplace", git_prefix),
("GIT_REPLACE_REF_BASE", "replaceRefBase", git_prefix),
("GITOXIDE_OBJECT_CACHE_MEMORY", "cacheLimit", gitoxide_prefix),
] {
if let Some(value) = var_as_bstring(var, git_prefix) {
if let Some(value) = var_as_bstring(var, permission) {
section.push_with_comment(
key.try_into().expect("statically known to be valid"),
Some(value.as_ref()),
format!("from {var}").as_str(),
);
}
}

if section.num_values() == 0 {
let id = section.id();
env_override.remove_section_by_id(id);
}
}

{
let mut section = env_override
.new_section("core", None)
.expect("statically known valid section name");

for (var, key) in [("GITOXIDE_PACK_CACHE_MEMORY", "deltaBaseCacheLimit")] {
if let Some(value) = var_as_bstring(var, gitoxide_prefix) {
section.push_with_comment(
key.try_into().expect("statically known to be valid"),
Some(value.as_ref()),
Expand Down
25 changes: 25 additions & 0 deletions git-repository/src/config/cache/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,31 @@ pub(crate) fn reflog_or_default(
})
}

/// Return `(pack_cache_bytes, object_cache_bytes)` as parsed from git-config
pub(crate) fn parse_object_caches(
config: &git_config::File<'static>,
lenient: bool,
mut filter_config_section: fn(&git_config::file::Metadata) -> bool,
) -> Result<(Option<usize>, usize), Error> {
let key = "core.deltaBaseCacheLimit";
let pack_cache_bytes = config
.integer_filter_by_key(key, &mut filter_config_section)
.transpose()
.with_leniency(lenient)
.map_err(|err| Error::Value { source: err, key })?;
let key = "gitoxide.objects.cacheLimit";
let object_cache_bytes = config
.integer_filter_by_key(key, &mut filter_config_section)
.transpose()
.with_leniency(lenient)
.map_err(|err| Error::Value { source: err, key })?
.unwrap_or_default();
Ok((
pack_cache_bytes.and_then(|v| v.try_into().ok()),
object_cache_bytes.try_into().unwrap_or_default(),
))
}

pub(crate) fn parse_core_abbrev(
config: &git_config::File<'static>,
object_hash: git_hash::Kind,
Expand Down
5 changes: 5 additions & 0 deletions git-repository/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,11 @@ pub(crate) struct Cache {
pub(crate) url_scheme: OnceCell<remote::url::SchemePermission>,
/// The algorithm to use when diffing blobs
pub(crate) diff_algorithm: OnceCell<git_diff::blob::Algorithm>,
/// The amount of bytes to use for a memory backed delta pack cache. If `Some(0)`, no cache is used, if `None`
/// a standard cache is used which costs near to nothing and always pays for itself.
pub(crate) pack_cache_bytes: Option<usize>,
/// The amount of bytes to use for caching whole objects, or 0 to turn it off entirely.
pub(crate) object_cache_bytes: usize,
/// The config section filter from the options used to initialize this instance. Keep these in sync!
filter_config_section: fn(&git_config::file::Metadata) -> bool,
/// The object kind to pick if a prefix is ambiguous.
Expand Down
3 changes: 0 additions & 3 deletions git-repository/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@
//! Use the `cache-efficiency-debug` cargo feature to learn how efficient the cache actually is - it's easy to end up with lowered
//! performance if the cache is not hit in 50% of the time.
//!
//! Environment variables can also be used for configuration if the application is calling
//! [`apply_environment()`][crate::Repository::apply_environment()].
//!
//! ### Shortcomings & Limitations
//!
//! - Only a single `crate::object` or derivatives can be held in memory at a time, _per `Easy*`_.
Expand Down
63 changes: 0 additions & 63 deletions git-repository/src/repository/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,67 +26,4 @@ impl crate::Repository {
self.object_cache_size(bytes)
}
}

/// Read well-known environment variables related to caches and apply them to this instance, but not to clones of it - each
/// needs their own configuration.
///
/// Note that environment configuration never fails due to invalid environment values, but it should be used with caution as it
/// could be used to cause high memory consumption.
///
/// Use the `GITOXIDE_DISABLE_PACK_CACHE` environment variable to turn off any pack cache, which can be beneficial when it's known that
/// the cache efficiency is low. Use `GITOXIDE_PACK_CACHE_MEMORY=512MB` to use up to 512MB of RAM for the pack delta base
/// cache. If none of these are set, the default cache is fast enough to nearly never cause a (marginal) slow-down while providing
/// some gains most of the time. Note that the value given is _per-thread_.
///
/// Use the `GITOXIDE_OBJECT_CACHE_MEMORY=16mb` to set the given amount of memory to store full objects, on a per-thread basis.
pub fn apply_environment(self) -> Self {
// We have no cache types available without this flag currently. Maybe this should change at some point.
#[cfg(not(feature = "max-performance-safe"))]
return self;
#[cfg(feature = "max-performance-safe")]
{
let pack_cache_disabled = std::env::var_os("GITOXIDE_DISABLE_PACK_CACHE").is_some();
let mut this = self;
if !pack_cache_disabled {
let bytes = parse_bytes_from_var("GITOXIDE_PACK_CACHE_MEMORY");
let new_pack_cache = move || -> Box<git_odb::cache::PackCache> {
match bytes {
Some(bytes) => Box::new(git_pack::cache::lru::MemoryCappedHashmap::new(bytes)),
None => Box::new(git_pack::cache::lru::StaticLinkedList::<64>::default()),
}
};
this.objects.set_pack_cache(new_pack_cache);
} else {
this.objects.unset_pack_cache();
}

if let Some(bytes) = parse_bytes_from_var("GITOXIDE_OBJECT_CACHE_MEMORY") {
this.objects
.set_object_cache(move || Box::new(git_pack::cache::object::MemoryCappedHashmap::new(bytes)));
}
this
}
}
}

#[cfg(feature = "max-performance-safe")]
fn parse_bytes_from_var(name: &str) -> Option<usize> {
std::env::var(name)
.ok()
.and_then(|v| {
byte_unit::Byte::from_str(&v)
.map_err(|err| log::warn!("Failed to parse {:?} into byte unit for pack cache: {}", v, err))
.ok()
})
.and_then(|unit| {
unit.get_bytes()
.try_into()
.map_err(|err| {
log::warn!(
"Parsed bytes value is not representable as usize. Defaulting to standard pack cache: {}",
err
)
})
.ok()
})
}
37 changes: 27 additions & 10 deletions git-repository/src/repository/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,12 @@ impl crate::Repository {
linked_worktree_options: crate::open::Options,
index: crate::worktree::IndexStorage,
) -> Self {
let objects = setup_objects(objects, &config);
crate::Repository {
bufs: RefCell::new(Vec::with_capacity(4)),
work_tree,
common_dir,
objects: {
#[cfg(feature = "max-performance-safe")]
{
objects.with_pack_cache(|| Box::new(git_pack::cache::lru::StaticLinkedList::<64>::default()))
}
#[cfg(not(feature = "max-performance-safe"))]
{
objects
}
},
objects,
refs,
config,
options: linked_worktree_options,
Expand All @@ -36,3 +28,28 @@ impl crate::Repository {
self.into()
}
}

#[cfg_attr(not(feature = "max-performance-safe"), allow(unused_variables, unused_mut))]
fn setup_objects(mut objects: crate::OdbHandle, config: &crate::config::Cache) -> crate::OdbHandle {
#[cfg(feature = "max-performance-safe")]
{
match config.pack_cache_bytes {
None => objects.set_pack_cache(|| Box::new(git_pack::cache::lru::StaticLinkedList::<64>::default())),
Some(0) => objects.unset_pack_cache(),
Some(bytes) => objects.set_pack_cache(move || -> Box<git_odb::cache::PackCache> {
Box::new(git_pack::cache::lru::MemoryCappedHashmap::new(bytes))
}),
};
if config.object_cache_bytes == 0 {
objects.unset_object_cache();
} else {
let bytes = config.object_cache_bytes;
objects.set_object_cache(move || Box::new(git_pack::cache::object::MemoryCappedHashmap::new(bytes)));
}
objects
}
#[cfg(not(feature = "max-performance-safe"))]
{
objects
}
}
4 changes: 4 additions & 0 deletions git-repository/src/repository/permissions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ pub struct Environment {
/// Note that identity related environment variables prefixed with `GIT_` are falling under the
/// `git_prefix` permission, like `GIT_AUTHOR_NAME`.
pub identity: git_sec::Permission,
/// Decide if `gitoxide` specific variables may be read, prefixed with `GITOXIDE_`.
pub gitoxide_prefix: git_sec::Permission,
}

impl Environment {
Expand All @@ -94,6 +96,7 @@ impl Environment {
ssh_prefix: git_sec::Permission::Allow,
http_transport: git_sec::Permission::Allow,
identity: git_sec::Permission::Allow,
gitoxide_prefix: git_sec::Permission::Allow,
}
}
}
Expand Down Expand Up @@ -140,6 +143,7 @@ impl Permissions {
git_prefix: deny,
http_transport: deny,
identity: deny,
gitoxide_prefix: deny,
}
},
}
Expand Down
Git LFS file not shown
12 changes: 12 additions & 0 deletions git-repository/tests/fixtures/make_config_repos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,15 @@ git init http-proxy-authenticated
followRedirects
EOF
)

git init object-caches
(cd object-caches
git config core.deltaBaseCacheLimit 128m
git config gitoxide.objects.cacheLimit 16m
)

git init disabled-object-caches
(cd disabled-object-caches
git config core.deltaBaseCacheLimit 0
git config gitoxide.objects.cacheLimit 0
)
30 changes: 29 additions & 1 deletion git-repository/tests/repository/open.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,29 @@ mod submodules {
}
}

mod object_caches {
use crate::util::named_subrepo_opts;
use git_repository as git;

#[test]
fn default_git_and_custom_caches() -> crate::Result {
let opts = git::open::Options::isolated();
let repo = named_subrepo_opts("make_config_repos.sh", "object-caches", opts)?;
assert!(repo.objects.has_object_cache());
assert!(repo.objects.has_pack_cache());
Ok(())
}

#[test]
fn disabled() -> crate::Result {
let opts = git::open::Options::isolated();
let repo = named_subrepo_opts("make_config_repos.sh", "disabled-object-caches", opts)?;
assert!(!repo.objects.has_object_cache());
assert!(!repo.objects.has_pack_cache());
Ok(())
}
}

mod with_overrides {
use crate::util::named_subrepo_opts;
use git_object::bstr::BStr;
Expand Down Expand Up @@ -80,7 +103,9 @@ mod with_overrides {
.set("GIT_AUTHOR_NAME", "author name")
.set("GIT_AUTHOR_EMAIL", "author email")
.set("GIT_AUTHOR_DATE", default_date)
.set("EMAIL", "user email");
.set("EMAIL", "user email")
.set("GITOXIDE_PACK_CACHE_MEMORY", "0")
.set("GITOXIDE_OBJECT_CACHE_MEMORY", "5m");
let mut opts = git::open::Options::isolated()
.config_overrides([
"http.userAgent=agent-from-api",
Expand All @@ -95,6 +120,7 @@ mod with_overrides {
opts.permissions.env.git_prefix = Permission::Allow;
opts.permissions.env.http_transport = Permission::Allow;
opts.permissions.env.identity = Permission::Allow;
opts.permissions.env.gitoxide_prefix = Permission::Allow;
let repo = named_subrepo_opts("make_config_repos.sh", "http-config", opts)?;
let config = repo.config_snapshot();
assert_eq!(
Expand Down Expand Up @@ -175,6 +201,8 @@ mod with_overrides {
("gitoxide.commit.authorDate", default_date),
("gitoxide.commit.committerDate", default_date),
("gitoxide.user.emailFallback", "user email"),
("core.deltaBaseCacheLimit", "0"),
("gitoxide.objects.cacheLimit", "5m"),
] {
assert_eq!(
config
Expand Down
Loading

0 comments on commit becbd8d

Please sign in to comment.