diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 90017b25f250..e3dab2fc1d96 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -4488,13 +4488,17 @@ impl Tenant { let mut gc_cutoffs: HashMap = HashMap::with_capacity(timelines.len()); + // Ensures all timelines use the same start time when computing the time cutoff. + let now_ts_for_pitr_calc = SystemTime::now(); for timeline in timelines.iter() { let cutoff = timeline .get_last_record_lsn() .checked_sub(horizon) .unwrap_or(Lsn(0)); - let cutoffs = timeline.find_gc_cutoffs(cutoff, pitr, cancel, ctx).await?; + let cutoffs = timeline + .find_gc_cutoffs(now_ts_for_pitr_calc, cutoff, pitr, cancel, ctx) + .await?; let old = gc_cutoffs.insert(timeline.timeline_id, cutoffs); assert!(old.is_none()); } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index b36c2f487f59..c1b71262e029 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -4859,6 +4859,7 @@ impl Timeline { async fn find_gc_time_cutoff( &self, + now: SystemTime, pitr: Duration, cancel: &CancellationToken, ctx: &RequestContext, @@ -4866,7 +4867,6 @@ impl Timeline { debug_assert_current_span_has_tenant_and_timeline_id(); if self.shard_identity.is_shard_zero() { // Shard Zero has SLRU data and can calculate the PITR time -> LSN mapping itself - let now = SystemTime::now(); let time_range = if pitr == Duration::ZERO { humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid") } else { @@ -4952,6 +4952,7 @@ impl Timeline { #[instrument(skip_all, fields(timeline_id=%self.timeline_id))] pub(super) async fn find_gc_cutoffs( &self, + now: SystemTime, space_cutoff: Lsn, pitr: Duration, cancel: &CancellationToken, @@ -4979,7 +4980,7 @@ impl Timeline { // - if PITR interval is set, then this is our cutoff. // - if PITR interval is not set, then we do a lookup // based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases. - let time_cutoff = self.find_gc_time_cutoff(pitr, cancel, ctx).await?; + let time_cutoff = self.find_gc_time_cutoff(now, pitr, cancel, ctx).await?; Ok(match (pitr, time_cutoff) { (Duration::ZERO, Some(time_cutoff)) => { diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 94c65631b206..55cde8603e9f 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1799,6 +1799,24 @@ impl Timeline { Ok(()) } + /// Get a watermark for gc-compaction, that is the lowest LSN that we can use as the `gc_horizon` for + /// the compaction algorithm. It is min(space_cutoff, time_cutoff, latest_gc_cutoff, standby_horizon). + /// Leases and retain_lsns are considered in the gc-compaction job itself so we don't need to account for them + /// here. + pub(crate) fn get_gc_compaction_watermark(self: &Arc) -> Lsn { + let gc_cutoff_lsn = { + let gc_info = self.gc_info.read().unwrap(); + gc_info.min_cutoff() + }; + + // TODO: standby horizon should use leases so we don't really need to consider it here. + // let watermark = watermark.min(self.standby_horizon.load()); + + // TODO: ensure the child branches will not use anything below the watermark, or consider + // them when computing the watermark. + gc_cutoff_lsn.min(*self.get_latest_gc_cutoff_lsn()) + } + /// Split a gc-compaction job into multiple compaction jobs. The split is based on the key range and the estimated size of the compaction job. /// The function returns a list of compaction jobs that can be executed separately. If the upper bound of the compact LSN /// range is not specified, we will use the latest gc_cutoff as the upper bound, so that all jobs in the jobset acts @@ -1811,7 +1829,7 @@ impl Timeline { let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX { job.compact_lsn_range.end } else { - *self.get_latest_gc_cutoff_lsn() // use the real gc cutoff + self.get_gc_compaction_watermark() }; // Split compaction job to about 4GB each @@ -2006,7 +2024,7 @@ impl Timeline { // Therefore, it can only clean up data that cannot be cleaned up with legacy gc, instead of // cleaning everything that theoritically it could. In the future, it should use `self.gc_info` // to get the truth data. - let real_gc_cutoff = *self.get_latest_gc_cutoff_lsn(); + let real_gc_cutoff = self.get_gc_compaction_watermark(); // The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for // each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use // the real cutoff.