From 88f7d0515b824718a090844319643d20ad8b1e66 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 12 Dec 2024 13:26:19 +0100 Subject: [PATCH] pageserver: add `disk_compacted_lsn` --- docs/glossary.md | 1 + libs/pageserver_api/src/models.rs | 10 ++++-- libs/utils/src/lsn.rs | 11 ++++++- pageserver/src/http/openapi_spec.yml | 3 ++ pageserver/src/http/routes.rs | 1 + pageserver/src/metrics.rs | 16 +++++++++ .../tenant/storage_layer/inmemory_layer.rs | 2 +- pageserver/src/tenant/timeline.rs | 33 +++++++++++++++++++ 8 files changed, 73 insertions(+), 4 deletions(-) diff --git a/docs/glossary.md b/docs/glossary.md index 25c66828c076..d940b8608446 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -121,6 +121,7 @@ Neon safekeeper LSNs. See [safekeeper protocol section](safekeeper-protocol.md) Neon pageserver LSNs: * `last_record_lsn` - the end of last processed WAL record. * `disk_consistent_lsn` - data is known to be fully flushed and fsync'd to local disk on pageserver up to this LSN. +* `disk_compacted_lsn` - data is known to be compacted to L1 on local disk up to this LSN. * `remote_consistent_lsn` - The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash. TODO: use this name consistently in remote storage code. Now `disk_consistent_lsn` is used and meaning depends on the context. * `ancestor_lsn` - LSN of the branch point (the LSN at which this branch was created) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 5690b643f062..23a8347460a8 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -983,12 +983,18 @@ pub struct TimelineInfo { pub last_record_lsn: Lsn, pub prev_record_lsn: Option, pub latest_gc_cutoff_lsn: Lsn, + + /// The LSN that has been flushed to local disk. pub disk_consistent_lsn: Lsn, - /// The LSN that we have succesfully uploaded to remote storage + /// The LSN that has been compacted down to L1 on local disk. + pub disk_compacted_lsn: Lsn, + + /// The LSN that we have succesfully uploaded to remote storage, according to + /// our generation. pub remote_consistent_lsn: Lsn, - /// The LSN that we are advertizing to safekeepers + /// The LSN that we are advertizing to safekeepers, with verified generation. pub remote_consistent_lsn_visible: Lsn, /// The LSN from the start of the root timeline (never changes) diff --git a/libs/utils/src/lsn.rs b/libs/utils/src/lsn.rs index f18816560062..eb5704e6757e 100644 --- a/libs/utils/src/lsn.rs +++ b/libs/utils/src/lsn.rs @@ -2,7 +2,7 @@ use serde::{de::Visitor, Deserialize, Serialize}; use std::fmt; -use std::ops::{Add, AddAssign}; +use std::ops::{Add, AddAssign, Sub}; use std::str::FromStr; use std::sync::atomic::{AtomicU64, Ordering}; @@ -295,6 +295,15 @@ impl AddAssign for Lsn { } } +impl Sub for Lsn { + type Output = Lsn; + + fn sub(self, other: u64) -> Self::Output { + // panic if the addition overflows. + Lsn(self.0.checked_sub(other).unwrap()) + } +} + /// An [`Lsn`] that can be accessed atomically. pub struct AtomicLsn { inner: AtomicU64, diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index ee43440534e8..d053628d0a64 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -1050,6 +1050,9 @@ components: disk_consistent_lsn: type: string format: hex + disk_compacted_lsn: + type: string + format: hex remote_consistent_lsn: type: string format: hex diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 6e9ee976f41e..52f264ca7a54 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -480,6 +480,7 @@ async fn build_timeline_info_common( timeline_id: timeline.timeline_id, ancestor_timeline_id, ancestor_lsn, + disk_compacted_lsn: timeline.get_disk_compacted_lsn().await.unwrap_or(Lsn(0)), disk_consistent_lsn: timeline.get_disk_consistent_lsn(), remote_consistent_lsn: remote_consistent_lsn_projected, remote_consistent_lsn_visible, diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index b4e20cb8b90e..20b79344aae8 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -472,6 +472,15 @@ static DISK_CONSISTENT_LSN: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static DISK_COMPACTED_LSN: Lazy = Lazy::new(|| { + register_int_gauge_vec!( + "pageserver_disk_compacted_lsn", + "Disk compacted LSN grouped by timeline", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_projected_remote_consistent_lsn", @@ -2596,6 +2605,7 @@ pub(crate) struct TimelineMetrics { pub find_gc_cutoffs_histo: StorageTimeMetrics, pub last_record_lsn_gauge: IntGauge, pub disk_consistent_lsn_gauge: IntGauge, + pub disk_compacted_lsn_gauge: IntGauge, pub pitr_history_size: UIntGauge, pub archival_size: UIntGauge, pub(crate) layer_size_image: UIntGauge, @@ -2685,6 +2695,10 @@ impl TimelineMetrics { .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); + let disk_compacted_lsn_gauge = DISK_COMPACTED_LSN + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); + let pitr_history_size = PITR_HISTORY_SIZE .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); @@ -2790,6 +2804,7 @@ impl TimelineMetrics { load_layer_map_histo, last_record_lsn_gauge, disk_consistent_lsn_gauge, + disk_compacted_lsn_gauge, pitr_history_size, archival_size, layer_size_image, @@ -2855,6 +2870,7 @@ impl TimelineMetrics { let shard_id = &self.shard_id; let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); + let _ = DISK_COMPACTED_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]); { diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer.rs b/pageserver/src/tenant/storage_layer/inmemory_layer.rs index 71e53da20f7f..ea5d377e0359 100644 --- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs +++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs @@ -53,7 +53,7 @@ pub struct InMemoryLayer { /// This layer contains all the changes from 'start_lsn'. The /// start is inclusive. - start_lsn: Lsn, + pub(crate) start_lsn: Lsn, /// Frozen layers have an exclusive end LSN. /// Writes are only allowed when this is `None`. diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 8f1d5f6577a6..f6531d8c4611 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -1266,6 +1266,27 @@ impl Timeline { self.remote_client.remote_consistent_lsn_visible() } + /// Returns the latest LSN that has been compacted down into L1 on disk, or None on shutdown. + /// LSNs above it are likely in L0 delta layers. Note that these may not yet be uploaded to S3. + /// + /// TODO: consider tracking this explicitly instead of recomputing every time. + pub(crate) async fn get_disk_compacted_lsn(&self) -> Option { + let layers = self.layers.read().await; + let Ok(layermap) = layers.layer_map() else { + return None; + }; + // Find the lowest LSN in a frozen or L0 delta layer, otherwise use the disk LSN. + // TODO: check that this is correct. + let frozen_lsns = layermap.frozen_layers.iter().map(|l| l.start_lsn); + let delta_lsns = layermap.level0_deltas().iter().map(|l| l.lsn_range.start); + let compacted_lsn = frozen_lsns + .chain(delta_lsns) + .min() + .map(|lsn| lsn.saturating_sub(Lsn(1))) + .unwrap_or(self.disk_consistent_lsn.load()); + Some(compacted_lsn) + } + /// The sum of the file size of all historic layers in the layer map. /// This method makes no distinction between local and remote layers. /// Hence, the result **does not represent local filesystem usage**. @@ -2692,6 +2713,14 @@ impl Timeline { num_layers, disk_consistent_lsn, total_physical_size ); + // TODO: consider finding a better place for this. + self.metrics.disk_compacted_lsn_gauge.set( + self.get_disk_compacted_lsn() + .await + .expect("layermanager must be open during init") + .0 as i64, + ); + timer.stop_and_record(); Ok(()) } @@ -4823,6 +4852,10 @@ impl Timeline { drop_wlock(guard); + if let Some(lsn) = self.get_disk_compacted_lsn().await { + self.metrics.disk_compacted_lsn_gauge.set(lsn.0 as i64); + } + Ok(()) }