From 300555309c37c31ba11f18b3ad377355b0b5fe91 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 13 Sep 2023 17:37:45 +0100 Subject: [PATCH] pageserver: respect attachment mode for GC and compaction --- pageserver/src/tenant.rs | 17 +++++++ pageserver/src/tenant/config.rs | 79 ++++++++++++++++++++++++++++++++- 2 files changed, 95 insertions(+), 1 deletion(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 9c10409d8f568..cc54eec31474f 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1641,6 +1641,15 @@ impl Tenant { "Cannot run GC iteration on inactive tenant" ); + { + let conf = self.tenant_conf.read().unwrap(); + + if !conf.may_delete_layers() { + info!("Skipping GC in location state {:?}", conf); + return Ok(GcResult::default()); + } + } + self.gc_iteration_internal(target_timeline_id, horizon, pitr, ctx) .await } @@ -1659,6 +1668,14 @@ impl Tenant { "Cannot run compaction iteration on inactive tenant" ); + { + let conf = self.tenant_conf.read().unwrap(); + if !conf.may_delete_layers() || !conf.may_upload_layers() { + info!("Skipping compaction in location state {:?}", conf); + return Ok(()); + } + } + // Scan through the hashmap and collect a list of all the timelines, // while holding the lock. Then drop the lock and actually perform the // compactions. We don't want to block everything else while the diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index 550e15c28e12a..5646cb98b41f2 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -64,6 +64,9 @@ pub(crate) enum AttachmentMode { pub(crate) struct AttachedLocationConfig { pub(crate) generation: Generation, pub(crate) attach_mode: AttachmentMode, + // TODO: add a flag to override AttachmentMode's policies under + // disk pressure (i.e. unblock uploads under disk pressure in Stale + // state, unblock deletions after timeout in Multi state) } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -80,7 +83,7 @@ pub(crate) enum LocationMode { /// Per-tenant, per-pageserver configuration. All pageservers use the same TenantConf, /// but have distinct LocationConf. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] pub(crate) struct LocationConf { /// The location-specific part of the configuration, describes the operating /// mode of this pageserver for this tenant. @@ -89,6 +92,23 @@ pub(crate) struct LocationConf { pub(crate) tenant_conf: TenantConfOpt, } +impl std::fmt::Debug for LocationConf { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.mode { + LocationMode::Attached(conf) => { + write!( + f, + "Attached {:?}, gen={:?}", + conf.attach_mode, conf.generation + ) + } + LocationMode::Secondary(conf) => { + write!(f, "Secondary, warm={}", conf.warm) + } + } + } +} + impl LocationConf { pub(crate) fn new(tenant_conf: TenantConfOpt, generation: Generation) -> Self { Self { @@ -117,6 +137,63 @@ impl LocationConf { } } } + + /// Consult attachment mode to determine whether we are currently permitted + /// to delete layers. This is only advisory, not required for data safety. + /// See [`AttachmentMode`] for more context. + pub(crate) fn may_delete_layers(&self) -> bool { + // TODO: add an override for disk pressure in AttachedLocationConfig, + // and respect it here. + match &self.mode { + LocationMode::Attached(attach_conf) => { + match attach_conf.attach_mode { + AttachmentMode::Single => true, + AttachmentMode::Multi | AttachmentMode::Stale => { + // In Multi mode we avoid doing deletions because some other + // attached pageserver might get 404 while trying to read + // a layer we delete which is still referenced in their metadata. + // + // In Stale mode, we avoid doing deletions because we expect + // that they would ultimately fail validation in the deletion + // queue due to our stale generation. + false + } + } + } + LocationMode::Secondary(_) => { + // Do not expect to be called in this state + tracing::error!("Called may_delete_layers on a tenant in secondary mode"); + false + } + } + } + + /// Whether we are currently hinted that it is worthwhile to upload layers. + /// This is only advisory, not required for data safety. + /// See [`AttachmentMode`] for more context. + pub(crate) fn may_upload_layers(&self) -> bool { + // TODO: add an override for disk pressure in AttachedLocationConfig, + // and respect it here. + match &self.mode { + LocationMode::Attached(attach_conf) => { + match attach_conf.attach_mode { + AttachmentMode::Single | AttachmentMode::Multi => true, + AttachmentMode::Stale => { + // In Stale mode, we avoid dong uploads because we expect that + // our replacement pageserver will already have started its own + // IndexPart that will never reference layers we upload: it is + // wasteful. + false + } + } + } + LocationMode::Secondary(_) => { + // Do not expect to be called in this state + tracing::error!("Called may_upload_layers on a tenant in secondary mode"); + false + } + } + } } impl Default for LocationConf {