-
Notifications
You must be signed in to change notification settings - Fork 196
fix: size calculation for zstd frame cache #5859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
141f578
2cdded9
6235c17
fec0565
af64c74
297605a
ce81151
56bdcf4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -117,3 +117,4 @@ V0 | |
| V1 | ||
| VPS | ||
| WIP | ||
| zstd | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,13 +7,22 @@ pub mod plain; | |
|
|
||
| pub use any::AnyCar; | ||
| pub use forest::ForestCar; | ||
| use get_size2::GetSize as _; | ||
| pub use many::ManyCar; | ||
| pub use plain::PlainCar; | ||
|
|
||
| use ahash::HashMap; | ||
| use cid::Cid; | ||
| use lru::LruCache; | ||
| use positioned_io::{ReadAt, Size}; | ||
| use std::{ | ||
| num::NonZeroUsize, | ||
| sync::{ | ||
| LazyLock, | ||
| atomic::{AtomicUsize, Ordering}, | ||
| }, | ||
| }; | ||
|
|
||
| use crate::utils::{cache::SizeTrackingLruCache, get_size::CidWrapper}; | ||
|
|
||
| pub trait RandomAccessFileReader: ReadAt + Size + Send + Sync + 'static {} | ||
| impl<X: ReadAt + Size + Send + Sync + 'static> RandomAccessFileReader for X {} | ||
|
|
@@ -24,52 +33,81 @@ pub type CacheKey = u64; | |
|
|
||
| type FrameOffset = u64; | ||
|
|
||
| pub static ZSTD_FRAME_CACHE_DEFAULT_MAX_SIZE: LazyLock<usize> = LazyLock::new(|| { | ||
| const ENV_KEY: &str = "FOREST_ZSTD_FRAME_CACHE_DEFAULT_MAX_SIZE"; | ||
| if let Ok(value) = std::env::var(ENV_KEY) { | ||
| if let Ok(size) = value.parse::<NonZeroUsize>() { | ||
| let size = size.get(); | ||
| tracing::info!("zstd frame max size is set to {size} via {ENV_KEY}"); | ||
| return size; | ||
| } else { | ||
| tracing::warn!("Failed to parse {ENV_KEY}={value}, value should be a positive integer"); | ||
| } | ||
| } | ||
| // 1GiB | ||
| 1024 * 1024 * 1024 | ||
| }); | ||
|
|
||
| pub struct ZstdFrameCache { | ||
| /// Maximum size in bytes. Pages will be evicted if the total size of the | ||
| /// cache exceeds this amount. | ||
| pub max_size: usize, | ||
| current_size: usize, | ||
| lru: LruCache<(FrameOffset, CacheKey), HashMap<Cid, Vec<u8>>>, | ||
| current_size: AtomicUsize, | ||
| lru: SizeTrackingLruCache<(FrameOffset, CacheKey), HashMap<CidWrapper, Vec<u8>>>, | ||
| } | ||
|
|
||
| impl Default for ZstdFrameCache { | ||
| fn default() -> Self { | ||
| ZstdFrameCache::new(ZstdFrameCache::DEFAULT_SIZE) | ||
| ZstdFrameCache::new(*ZSTD_FRAME_CACHE_DEFAULT_MAX_SIZE) | ||
| } | ||
| } | ||
|
|
||
| impl ZstdFrameCache { | ||
| // 1 GiB | ||
| pub const DEFAULT_SIZE: usize = 1024 * 1024 * 1024; | ||
|
|
||
| pub fn new(max_size: usize) -> Self { | ||
| ZstdFrameCache { | ||
| max_size, | ||
| current_size: 0, | ||
| lru: LruCache::unbounded(), | ||
| current_size: AtomicUsize::new(0), | ||
| lru: SizeTrackingLruCache::unbounded_with_default_metrics_registry( | ||
| "zstd_frame_cache".into(), | ||
| ), | ||
| } | ||
| } | ||
|
|
||
| /// Return a clone of the value associated with `cid`. If a value is found, | ||
| /// the cache entry is moved to the top of the queue. | ||
| pub fn get(&mut self, offset: FrameOffset, key: CacheKey, cid: Cid) -> Option<Option<Vec<u8>>> { | ||
| pub fn get(&self, offset: FrameOffset, key: CacheKey, cid: Cid) -> Option<Option<Vec<u8>>> { | ||
| self.lru | ||
| .cache() | ||
| .write() | ||
| .get(&(offset, key)) | ||
| .map(|index| index.get(&cid).cloned()) | ||
| .map(|index| index.get(&cid.into()).cloned()) | ||
| } | ||
|
|
||
| /// Insert entry into lru-cache and evict pages if `max_size` has been exceeded. | ||
| pub fn put(&mut self, offset: FrameOffset, key: CacheKey, index: HashMap<Cid, Vec<u8>>) { | ||
| fn size_of_entry(entry: &HashMap<Cid, Vec<u8>>) -> usize { | ||
| entry.values().map(Vec::len).sum::<usize>() | ||
| pub fn put(&self, offset: FrameOffset, key: CacheKey, index: HashMap<CidWrapper, Vec<u8>>) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd love it if we could have some coverage here, both on the logic level and size calculation correctness.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
| let lru_key = (offset, key); | ||
| let lru_key_size = lru_key.get_size(); | ||
| let entry_size = index.get_size(); | ||
| // Skip large items | ||
| if entry_size.saturating_add(lru_key_size) >= self.max_size { | ||
| return; | ||
| } | ||
| self.current_size += size_of_entry(&index); | ||
| if let Some(prev_entry) = self.lru.put((offset, key), index) { | ||
| self.current_size -= size_of_entry(&prev_entry); | ||
|
|
||
| if let Some((_, prev_entry)) = self.lru.push(lru_key, index) { | ||
| // keys are cancelled out | ||
| self.current_size.fetch_add(entry_size, Ordering::Relaxed); | ||
| self.current_size | ||
| .fetch_sub(prev_entry.get_size(), Ordering::Relaxed); | ||
| } else { | ||
| self.current_size | ||
| .fetch_add(entry_size.saturating_add(lru_key_size), Ordering::Relaxed); | ||
| } | ||
| while self.current_size > self.max_size { | ||
| if let Some((_, entry)) = self.lru.pop_lru() { | ||
| self.current_size -= size_of_entry(&entry) | ||
| while self.current_size.load(Ordering::Relaxed) > self.max_size { | ||
| if let Some((prev_key, prev_entry)) = self.lru.pop_lru() { | ||
| self.current_size.fetch_sub( | ||
| prev_key.get_size().saturating_add(prev_entry.get_size()), | ||
| Ordering::Relaxed, | ||
| ); | ||
| } else { | ||
| break; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,19 +59,27 @@ where | |
| registry.register_collector(Box::new(self.clone())); | ||
| } | ||
|
|
||
| pub fn new_without_metrics_registry( | ||
| cache_name: Cow<'static, str>, | ||
| capacity: NonZeroUsize, | ||
| ) -> Self { | ||
| fn new_inner(cache_name: Cow<'static, str>, capacity: Option<NonZeroUsize>) -> Self { | ||
| static ID_GENERATOR: AtomicUsize = AtomicUsize::new(0); | ||
|
|
||
| Self { | ||
| cache_id: ID_GENERATOR.fetch_add(1, Ordering::Relaxed), | ||
| cache_name, | ||
| cache: Arc::new(RwLock::new(LruCache::new(capacity))), | ||
| cache: Arc::new(RwLock::new( | ||
| capacity | ||
| .map(LruCache::new) | ||
| .unwrap_or_else(LruCache::unbounded), | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we ban the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
| )), | ||
| } | ||
| } | ||
|
|
||
| pub fn new_without_metrics_registry( | ||
| cache_name: Cow<'static, str>, | ||
| capacity: NonZeroUsize, | ||
| ) -> Self { | ||
| Self::new_inner(cache_name, Some(capacity)) | ||
| } | ||
|
|
||
| pub fn new_with_metrics_registry( | ||
| cache_name: Cow<'static, str>, | ||
| capacity: NonZeroUsize, | ||
|
|
@@ -89,6 +97,23 @@ where | |
| Self::new_with_metrics_registry(cache_name, capacity, &mut default_registry()) | ||
| } | ||
|
|
||
| pub fn unbounded_without_metrics_registry(cache_name: Cow<'static, str>) -> Self { | ||
| Self::new_inner(cache_name, None) | ||
| } | ||
|
|
||
| pub fn unbounded_with_metrics_registry( | ||
| cache_name: Cow<'static, str>, | ||
| metrics_registry: &mut Registry, | ||
| ) -> Self { | ||
| let c = Self::unbounded_without_metrics_registry(cache_name); | ||
| c.register_metrics(metrics_registry); | ||
| c | ||
| } | ||
|
|
||
| pub fn unbounded_with_default_metrics_registry(cache_name: Cow<'static, str>) -> Self { | ||
| Self::unbounded_with_metrics_registry(cache_name, &mut default_registry()) | ||
| } | ||
|
|
||
| pub fn cache(&self) -> &Arc<RwLock<LruCache<K, V>>> { | ||
| &self.cache | ||
| } | ||
|
|
@@ -113,6 +138,10 @@ where | |
| self.cache.read().peek(k).cloned() | ||
| } | ||
|
|
||
| pub fn pop_lru(&self) -> Option<(K, V)> { | ||
| self.cache.write().pop_lru() | ||
| } | ||
|
|
||
| pub fn len(&self) -> usize { | ||
| self.cache.read().len() | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.