From 9f097af0fcc57413190ed4468c0777a278ec0e55 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Wed, 4 Dec 2024 12:33:53 +0100 Subject: [PATCH] pageserver: add disk consistent and remote lsn metrics --- pageserver/src/metrics.rs | 24 +++++++++++++++++++ .../src/tenant/remote_timeline_client.rs | 8 +++++++ pageserver/src/tenant/timeline.rs | 4 ++++ 3 files changed, 36 insertions(+) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index e5707356110f..f2f00dc67395 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -464,6 +464,24 @@ static LAST_RECORD_LSN: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static DISK_CONSISTENT_LSN: Lazy = Lazy::new(|| { + register_int_gauge_vec!( + "pageserver_disk_consistent_lsn", + "Disk consistent LSN grouped by timeline", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + +pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy = Lazy::new(|| { + register_int_gauge_vec!( + "pageserver_projected_remote_consistent_lsn", + "Projected remote consistent LSN grouped by timeline", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + static PITR_HISTORY_SIZE: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_pitr_history_size", @@ -2395,6 +2413,7 @@ pub(crate) struct TimelineMetrics { pub garbage_collect_histo: StorageTimeMetrics, pub find_gc_cutoffs_histo: StorageTimeMetrics, pub last_record_lsn_gauge: IntGauge, + pub disk_consistent_lsn_gauge: IntGauge, pub pitr_history_size: UIntGauge, pub archival_size: UIntGauge, pub(crate) layer_size_image: UIntGauge, @@ -2479,6 +2498,10 @@ impl TimelineMetrics { .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); + let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); + let pitr_history_size = PITR_HISTORY_SIZE .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); @@ -2579,6 +2602,7 @@ impl TimelineMetrics { find_gc_cutoffs_histo, load_layer_map_histo, last_record_lsn_gauge, + disk_consistent_lsn_gauge, pitr_history_size, archival_size, layer_size_image, diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 007bd3eef083..94f69256d74a 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -2190,6 +2190,14 @@ impl RemoteTimelineClient { upload_queue.clean.1 = Some(task.task_id); let lsn = upload_queue.clean.0.metadata.disk_consistent_lsn(); + crate::metrics::PROJECTED_REMOTE_CONSISTENT_LSN + .get_metric_with_label_values(&[ + &self.tenant_shard_id.tenant_id.to_string(), + &format!("{}", self.tenant_shard_id.shard_slug()), + &self.timeline_id.to_string(), + ]) + .unwrap() + .set(lsn.0 as i64); if self.generation.is_none() { // Legacy mode: skip validating generation diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 02a839a83bce..444c0b762384 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -3850,6 +3850,10 @@ impl Timeline { fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool { let old_value = self.disk_consistent_lsn.fetch_max(new_value); assert!(new_value >= old_value, "disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}"); + + self.metrics + .disk_consistent_lsn_gauge + .set(new_value.0 as i64); new_value != old_value }