-
Notifications
You must be signed in to change notification settings - Fork 196
fix(gc): schedule automatic GC only when the node is in sync #6165
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,6 +74,7 @@ pub struct SnapshotGarbageCollector<DB> { | |
| running: AtomicBool, | ||
| blessed_lite_snapshot: RwLock<Option<PathBuf>>, | ||
| db: RwLock<Option<Arc<DB>>>, | ||
| sync_status: RwLock<Option<Arc<RwLock<crate::chain_sync::SyncStatusReport>>>>, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you think this will be better here?
This way we can avoid doing:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's set more than once after every GC
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I missed this GC restarts the node.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need two
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we simplify this type?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @elmattic simplified type by adding |
||
| // On mainnet, it takes ~50MiB-200MiB RAM, depending on the time cost of snapshot export | ||
| memory_db: RwLock<Option<HashMap<Cid, Vec<u8>>>>, | ||
| memory_db_head_key: RwLock<Option<TipsetKey>>, | ||
|
|
@@ -111,6 +112,7 @@ where | |
| running: AtomicBool::new(false), | ||
| blessed_lite_snapshot: RwLock::new(None), | ||
| db: RwLock::new(None), | ||
| sync_status: RwLock::new(None), | ||
| memory_db: RwLock::new(None), | ||
| memory_db_head_key: RwLock::new(None), | ||
| exported_head_key: RwLock::new(None), | ||
|
|
@@ -132,6 +134,10 @@ where | |
| *self.car_db_head_epoch.write() = Some(epoch); | ||
| } | ||
|
|
||
| pub fn set_sync_status(&self, sync_status: Arc<RwLock<crate::chain_sync::SyncStatusReport>>) { | ||
| *self.sync_status.write() = Some(sync_status) | ||
| } | ||
|
|
||
| pub async fn event_loop(&self) { | ||
| while self.trigger_rx.recv_async().await.is_ok() { | ||
| if self.running.load(Ordering::Relaxed) { | ||
|
akaladarshi marked this conversation as resolved.
|
||
|
|
@@ -170,18 +176,22 @@ where | |
| ); | ||
| loop { | ||
| if !self.running.load(Ordering::Relaxed) | ||
| && let Some(db) = &*self.db.read() | ||
| && let Some(car_db_head_epoch) = *self.car_db_head_epoch.read() | ||
| && let Ok(head_key) = HeaviestTipsetKeyProvider::heaviest_tipset_key(db) | ||
| && let Ok(head) = Tipset::load_required(db, &head_key) | ||
| && let Some(sync_status) = &*self.sync_status.read() | ||
| { | ||
| let head_epoch = head.epoch(); | ||
| if head_epoch - car_db_head_epoch >= snap_gc_interval_epochs | ||
| const IN_SYNC_EPOCH_THRESHOLD: i64 = 2; | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I notice
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a potential bug
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is because there we are using time diff only: let time_diff = now_ts.saturating_sub(heaviest.min_timestamp());
if time_diff < seconds_per_epoch as u64 * SYNCED_EPOCH_THRESHOLD {
NodeSyncStatus::Synced
} else {
NodeSyncStatus::Syncing
}But you can use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed!
akaladarshi marked this conversation as resolved.
Outdated
|
||
| let sync_status = &*sync_status.read(); | ||
| let network_head_epoch = sync_status.network_head_epoch; | ||
| let head_epoch = sync_status.current_head_epoch; | ||
| if head_epoch > 0 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: this test condition is quite a beast, maybe it's time to break it down into functions and add some comments.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added comment to each condition |
||
| && head_epoch <= network_head_epoch | ||
| && head_epoch + IN_SYNC_EPOCH_THRESHOLD >= network_head_epoch | ||
| && head_epoch - car_db_head_epoch >= snap_gc_interval_epochs | ||
| && self.trigger_tx.try_send(()).is_ok() | ||
| { | ||
| tracing::info!(%car_db_head_epoch, %head_epoch, %snap_gc_interval_epochs, "Snap GC scheduled"); | ||
| tracing::info!(%car_db_head_epoch, %head_epoch, %network_head_epoch, %snap_gc_interval_epochs, "Snap GC scheduled"); | ||
| } else { | ||
| tracing::trace!(%car_db_head_epoch, %head_epoch, %snap_gc_interval_epochs, "Snap GC not scheduled"); | ||
| tracing::debug!(%car_db_head_epoch, %head_epoch, %network_head_epoch, %snap_gc_interval_epochs, "Snap GC not scheduled"); | ||
| } | ||
| } | ||
| tokio::time::sleep(snap_gc_check_interval).await; | ||
|
|
@@ -219,6 +229,7 @@ where | |
| } | ||
| map | ||
| }); | ||
| let start = Instant::now(); | ||
| let (head_ts, _) = crate::chain::export_from_head::<Sha256>( | ||
| &db, | ||
| self.recent_state_roots, | ||
|
|
@@ -235,7 +246,11 @@ where | |
| head_ts.epoch() | ||
| )); | ||
| temp_path.persist(&target_path)?; | ||
| tracing::info!("exported lite snapshot at {}", target_path.display()); | ||
| tracing::info!( | ||
| "exported lite snapshot at {}, took {}", | ||
| target_path.display(), | ||
| humantime::format_duration(start.elapsed()) | ||
| ); | ||
| *self.blessed_lite_snapshot.write() = Some(target_path); | ||
| *self.exported_head_key.write() = Some(head_ts.key().clone()); | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.