Skip to content

Commit

Permalink
fix(pageserver): ensure GC computes time cutoff using the same start …
Browse files Browse the repository at this point in the history
…time

Signed-off-by: Alex Chi Z <[email protected]>
  • Loading branch information
skyzh committed Dec 18, 2024
1 parent 61fcf64 commit 4c7f881
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 8 deletions.
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4482,13 +4482,17 @@ impl Tenant {
let mut gc_cutoffs: HashMap<TimelineId, GcCutoffs> =
HashMap::with_capacity(timelines.len());

// Ensures all timelines use the same start time when computing the time cutoff.
let now = SystemTime::now();
for timeline in timelines.iter() {
let cutoff = timeline
.get_last_record_lsn()
.checked_sub(horizon)
.unwrap_or(Lsn(0));

let cutoffs = timeline.find_gc_cutoffs(cutoff, pitr, cancel, ctx).await?;
let cutoffs = timeline
.find_gc_cutoffs(now, cutoff, pitr, cancel, ctx)
.await?;
let old = gc_cutoffs.insert(timeline.timeline_id, cutoffs);
assert!(old.is_none());
}
Expand Down
5 changes: 3 additions & 2 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4837,14 +4837,14 @@ impl Timeline {

async fn find_gc_time_cutoff(
&self,
now: SystemTime,
pitr: Duration,
cancel: &CancellationToken,
ctx: &RequestContext,
) -> Result<Option<Lsn>, PageReconstructError> {
debug_assert_current_span_has_tenant_and_timeline_id();
if self.shard_identity.is_shard_zero() {
// Shard Zero has SLRU data and can calculate the PITR time -> LSN mapping itself
let now = SystemTime::now();
let time_range = if pitr == Duration::ZERO {
humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid")
} else {
Expand Down Expand Up @@ -4930,6 +4930,7 @@ impl Timeline {
#[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
pub(super) async fn find_gc_cutoffs(
&self,
now: SystemTime,
space_cutoff: Lsn,
pitr: Duration,
cancel: &CancellationToken,
Expand Down Expand Up @@ -4957,7 +4958,7 @@ impl Timeline {
// - if PITR interval is set, then this is our cutoff.
// - if PITR interval is not set, then we do a lookup
// based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases.
let time_cutoff = self.find_gc_time_cutoff(pitr, cancel, ctx).await?;
let time_cutoff = self.find_gc_time_cutoff(now, pitr, cancel, ctx).await?;

Ok(match (pitr, time_cutoff) {
(Duration::ZERO, Some(time_cutoff)) => {
Expand Down
20 changes: 18 additions & 2 deletions pageserver/src/tenant/timeline/compaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1798,6 +1798,22 @@ impl Timeline {
Ok(())
}

/// Get a watermark for gc-compaction, that is the lowest LSN that we can use as the `gc_horizon` for
/// the compaction algorithm. It is min(space_cutoff, time_cutoff, latest_gc_cutoff, standby_horizon).
/// Leases and retain_lsns are considered in the gc-compaction job itself so we don't need to compute it
/// here.
pub(crate) fn get_gc_compaction_watermark(self: &Arc<Self>) -> Lsn {
let gc_cutoff_lsn = {
let gc_info = self.gc_info.read().unwrap();
gc_info.min_cutoff()
};
let watermark = gc_cutoff_lsn.min(*self.get_latest_gc_cutoff_lsn());
let watermark = watermark.min(self.standby_horizon.load());
// TODO: ensure the child branches will not use anything below the watermark, or consider
// them when computing the watermark.
watermark
}

/// Split a gc-compaction job into multiple compaction jobs. The split is based on the key range and the estimated size of the compaction job.
/// The function returns a list of compaction jobs that can be executed separately. If the upper bound of the compact LSN
/// range is not specified, we will use the latest gc_cutoff as the upper bound, so that all jobs in the jobset acts
Expand All @@ -1810,7 +1826,7 @@ impl Timeline {
let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX {
job.compact_lsn_range.end
} else {
*self.get_latest_gc_cutoff_lsn() // use the real gc cutoff
self.get_gc_compaction_watermark()
};

// Split compaction job to about 4GB each
Expand Down Expand Up @@ -2005,7 +2021,7 @@ impl Timeline {
// Therefore, it can only clean up data that cannot be cleaned up with legacy gc, instead of
// cleaning everything that theoritically it could. In the future, it should use `self.gc_info`
// to get the truth data.
let real_gc_cutoff = *self.get_latest_gc_cutoff_lsn();
let real_gc_cutoff = self.get_gc_compaction_watermark();
// The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for
// each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use
// the real cutoff.
Expand Down

0 comments on commit 4c7f881

Please sign in to comment.