From eae4470bb61fabb95a3eb7b1fd00087f89318d1a Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Fri, 4 Oct 2024 19:07:39 +0300 Subject: [PATCH] safekeeper: remove local WAL files ignoring peer_horizon_lsn. (#8900) If peer safekeeper needs garbage collected segment it will be fetched now from s3 using on-demand WAL download. Reduces danger of running out of disk space when safekeeper fails. --- safekeeper/src/remove_wal.rs | 28 ++++++++++++++++++---------- safekeeper/src/timeline_manager.rs | 4 ++-- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/safekeeper/src/remove_wal.rs b/safekeeper/src/remove_wal.rs index 16239d847ba4..df3ba9eb087b 100644 --- a/safekeeper/src/remove_wal.rs +++ b/safekeeper/src/remove_wal.rs @@ -2,21 +2,29 @@ use utils::lsn::Lsn; use crate::timeline_manager::StateSnapshot; -/// Get oldest LSN we still need to keep. We hold WAL till it is consumed -/// by all of 1) pageserver (remote_consistent_lsn) 2) peers 3) s3 -/// offloading. -/// While it is safe to use inmem values for determining horizon, -/// we use persistent to make possible normal states less surprising. -/// All segments covering LSNs before horizon_lsn can be removed. +/// Get oldest LSN we still need to keep. +/// +/// We hold WAL till it is consumed by +/// 1) pageserver (remote_consistent_lsn) +/// 2) s3 offloading. +/// 3) Additionally we must store WAL since last local commit_lsn because +/// that's where we start looking for last WAL record on start. +/// +/// If some peer safekeeper misses data it will fetch it from the remote +/// storage. While it is safe to use inmem values for determining horizon, we +/// use persistent to make possible normal states less surprising. All segments +/// covering LSNs before horizon_lsn can be removed. pub(crate) fn calc_horizon_lsn(state: &StateSnapshot, extra_horizon_lsn: Option) -> Lsn { use std::cmp::min; - let mut horizon_lsn = min( - state.cfile_remote_consistent_lsn, - state.cfile_peer_horizon_lsn, - ); + let mut horizon_lsn = state.cfile_remote_consistent_lsn; // we don't want to remove WAL that is not yet offloaded to s3 horizon_lsn = min(horizon_lsn, state.cfile_backup_lsn); + // Min by local commit_lsn to be able to begin reading WAL from somewhere on + // sk start. Technically we don't allow local commit_lsn to be higher than + // flush_lsn, but let's be double safe by including it as well. + horizon_lsn = min(horizon_lsn, state.cfile_commit_lsn); + horizon_lsn = min(horizon_lsn, state.flush_lsn); if let Some(extra_horizon_lsn) = extra_horizon_lsn { horizon_lsn = min(horizon_lsn, extra_horizon_lsn); } diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index 6be75479db7f..f5535c0ceaa4 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -47,7 +47,7 @@ pub(crate) struct StateSnapshot { pub(crate) remote_consistent_lsn: Lsn, // persistent control file values - pub(crate) cfile_peer_horizon_lsn: Lsn, + pub(crate) cfile_commit_lsn: Lsn, pub(crate) cfile_remote_consistent_lsn: Lsn, pub(crate) cfile_backup_lsn: Lsn, @@ -70,7 +70,7 @@ impl StateSnapshot { commit_lsn: state.inmem.commit_lsn, backup_lsn: state.inmem.backup_lsn, remote_consistent_lsn: state.inmem.remote_consistent_lsn, - cfile_peer_horizon_lsn: state.peer_horizon_lsn, + cfile_commit_lsn: state.commit_lsn, cfile_remote_consistent_lsn: state.remote_consistent_lsn, cfile_backup_lsn: state.backup_lsn, flush_lsn: read_guard.sk.flush_lsn(),