From 7b6636b321c8d53e6b5129ba6a30923173e95513 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Mon, 6 May 2024 12:02:06 -0400 Subject: [PATCH 1/4] feat(pageserver): add metrics for aux file size Signed-off-by: Alex Chi Z --- pageserver/src/aux_file.rs | 46 +++++++++++++++++++++++++ pageserver/src/context.rs | 6 ++++ pageserver/src/pgdatadir_mapping.rs | 52 +++++++++++++++++++++-------- pageserver/src/tenant/timeline.rs | 13 ++++++-- 4 files changed, 101 insertions(+), 16 deletions(-) diff --git a/pageserver/src/aux_file.rs b/pageserver/src/aux_file.rs index a26ed84a0d7d..982add927d59 100644 --- a/pageserver/src/aux_file.rs +++ b/pageserver/src/aux_file.rs @@ -1,3 +1,8 @@ +use std::sync::{ + atomic::{AtomicIsize, AtomicUsize}, + Arc, +}; + use bytes::{Buf, BufMut, Bytes}; use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE}; use tracing::warn; @@ -140,6 +145,47 @@ pub fn encode_file_value(files: &[(&str, &[u8])]) -> anyhow::Result> { Ok(encoded) } +/// An estimation of the size of aux files. +pub struct AuxFileSizeEstimator { + size: Arc>>, +} + +impl AuxFileSizeEstimator { + pub fn new() -> Self { + Self { + size: Arc::new(std::sync::Mutex::new(None)), + } + } + + pub fn on_base_backup(&self, new_size: usize) { + let mut guard = self.size.lock().unwrap(); + *guard = Some(new_size as isize); + } + + pub fn on_add(&self, file_size: usize) { + let mut guard = self.size.lock().unwrap(); + if let Some(size) = &mut *guard { + *size += file_size as isize; + } + } + + pub fn on_remove(&self, file_size: usize) { + let mut guard = self.size.lock().unwrap(); + if let Some(size) = &mut *guard { + *size -= file_size as isize; + } + } + + pub fn on_update(&self, old_size: usize, new_size: usize) { + let mut guard = self.size.lock().unwrap(); + if let Some(size) = &mut *guard { + *size += new_size as isize - old_size as isize; + } + } + + pub fn report(&self) {} +} + #[cfg(test)] mod tests { use super::*; diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index 86d0390c30b1..a76d720cfdfa 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -86,6 +86,8 @@ //! [`RequestContext`] argument. Functions in the middle of the call chain //! only need to pass it on. +use std::sync::atomic::AtomicUsize; + use crate::task_mgr::TaskKind; pub(crate) mod optional_counter; @@ -98,6 +100,8 @@ pub struct RequestContext { access_stats_behavior: AccessStatsBehavior, page_content_kind: PageContentKind, pub micros_spent_throttled: optional_counter::MicroSecondsCounterU32, + /// Total number of kilobytes of layer files processed in this request. + pub vectored_access_delta_file_size_kb: AtomicUsize, } /// The kind of access to the page cache. @@ -154,6 +158,7 @@ impl RequestContextBuilder { access_stats_behavior: AccessStatsBehavior::Update, page_content_kind: PageContentKind::Unknown, micros_spent_throttled: Default::default(), + vectored_access_delta_file_size_kb: AtomicUsize::new(0), }, } } @@ -168,6 +173,7 @@ impl RequestContextBuilder { access_stats_behavior: original.access_stats_behavior, page_content_kind: original.page_content_kind, micros_spent_throttled: Default::default(), + vectored_access_delta_file_size_kb: AtomicUsize::new(0), }, } } diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index a4215ee107b2..0c608c6a7f9f 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -699,13 +699,17 @@ impl Timeline { .await .context("scan")?; let mut result = HashMap::new(); + let mut sz = 0; for (_, v) in kv { let v = v.context("get value")?; let v = aux_file::decode_file_value_bytes(&v).context("value decode")?; for (fname, content) in v { result.insert(fname, content); + sz += fname.len(); + sz += content.len(); } } + self.aux_file_size_estimator.on_base_backup(sz); Ok(result) } @@ -1474,23 +1478,45 @@ impl<'a> DatadirModification<'a> { Err(PageReconstructError::MissingKey(_)) => None, Err(e) => return Err(e.into()), }; - let files = if let Some(ref old_val) = old_val { + let files: Vec<(&str, &[u8])> = if let Some(ref old_val) = old_val { aux_file::decode_file_value(old_val)? } else { Vec::new() }; - let new_files = if content.is_empty() { - files - .into_iter() - .filter(|(p, _)| &path != p) - .collect::>() - } else { - files - .into_iter() - .filter(|(p, _)| &path != p) - .chain(std::iter::once((path, content))) - .collect::>() - }; + let mut other_files = Vec::with_capacity(files.len()); + let mut modifying_file = None; + for file @ (p, content) in files { + if path == p { + assert!( + modifying_file.is_none(), + "duplicated entries found for {}", + path + ); + modifying_file = Some(content); + } else { + other_files.push(file); + } + } + let mut new_files = other_files; + match (modifying_file, content.is_empty()) { + (Some(old_content), false) => { + self.tline + .aux_file_size_estimator + .on_update(old_content.len(), content.len()); + new_files.push((path, content)); + } + (Some(old_content), true) => { + self.tline + .aux_file_size_estimator + .on_remove(old_content.len()); + // not adding the file key to the final `new_files` vec. + } + (None, false) => { + self.tline.aux_file_size_estimator.on_add(content.len()); + new_files.push((path, content)); + } + (None, true) => anyhow::bail!("removing non-existing aux file: {}", path), + } let new_val = aux_file::encode_file_value(&new_files)?; self.put(key, Value::Image(new_val.into())); } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index d6d012c70c35..ce22f003525b 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -61,9 +61,12 @@ use std::{ }; use crate::tenant::storage_layer::layer::local_layer_path; -use crate::tenant::{ - layer_map::{LayerMap, SearchResult}, - metadata::TimelineMetadata, +use crate::{ + aux_file::AuxFileSizeEstimator, + tenant::{ + layer_map::{LayerMap, SearchResult}, + metadata::TimelineMetadata, + }, }; use crate::{ context::{DownloadBehavior, RequestContext}, @@ -409,6 +412,8 @@ pub struct Timeline { /// Keep aux directory cache to avoid it's reconstruction on each update pub(crate) aux_files: tokio::sync::Mutex, + + pub(crate) aux_file_size_estimator: AuxFileSizeEstimator, } pub struct WalReceiverInfo { @@ -2257,6 +2262,8 @@ impl Timeline { dir: None, n_deltas: 0, }), + + aux_file_size_estimator: AuxFileSizeEstimator::new(), }; result.repartition_threshold = result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE; From 87dbd04aa11654022804bd1c2eab4f8d075d3bb8 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Tue, 7 May 2024 16:07:54 -0400 Subject: [PATCH 2/4] report to prometheus Signed-off-by: Alex Chi Z --- pageserver/src/aux_file.rs | 18 ++++++++++++------ pageserver/src/metrics.rs | 15 +++++++++++++++ pageserver/src/pgdatadir_mapping.rs | 2 +- pageserver/src/tenant/timeline.rs | 21 ++++++++++++--------- test_runner/fixtures/metrics.py | 1 + 5 files changed, 41 insertions(+), 16 deletions(-) diff --git a/pageserver/src/aux_file.rs b/pageserver/src/aux_file.rs index 982add927d59..e6d950487de0 100644 --- a/pageserver/src/aux_file.rs +++ b/pageserver/src/aux_file.rs @@ -1,8 +1,6 @@ -use std::sync::{ - atomic::{AtomicIsize, AtomicUsize}, - Arc, -}; +use std::sync::Arc; +use ::metrics::IntGauge; use bytes::{Buf, BufMut, Bytes}; use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE}; use tracing::warn; @@ -147,12 +145,14 @@ pub fn encode_file_value(files: &[(&str, &[u8])]) -> anyhow::Result> { /// An estimation of the size of aux files. pub struct AuxFileSizeEstimator { + aux_file_size_gauge: IntGauge, size: Arc>>, } impl AuxFileSizeEstimator { - pub fn new() -> Self { + pub fn new(aux_file_size_gauge: IntGauge) -> Self { Self { + aux_file_size_gauge, size: Arc::new(std::sync::Mutex::new(None)), } } @@ -160,12 +160,14 @@ impl AuxFileSizeEstimator { pub fn on_base_backup(&self, new_size: usize) { let mut guard = self.size.lock().unwrap(); *guard = Some(new_size as isize); + self.report(new_size as isize); } pub fn on_add(&self, file_size: usize) { let mut guard = self.size.lock().unwrap(); if let Some(size) = &mut *guard { *size += file_size as isize; + self.report(*size); } } @@ -173,6 +175,7 @@ impl AuxFileSizeEstimator { let mut guard = self.size.lock().unwrap(); if let Some(size) = &mut *guard { *size -= file_size as isize; + self.report(*size); } } @@ -180,10 +183,13 @@ impl AuxFileSizeEstimator { let mut guard = self.size.lock().unwrap(); if let Some(size) = &mut *guard { *size += new_size as isize - old_size as isize; + self.report(*size); } } - pub fn report(&self) {} + pub fn report(&self, size: isize) { + self.aux_file_size_gauge.set(size as i64); + } } #[cfg(test)] diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 256f2f334c1d..b27bfb43b077 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -585,6 +585,15 @@ static CURRENT_LOGICAL_SIZE: Lazy = Lazy::new(|| { .expect("failed to define current logical size metric") }); +static AUX_FILE_SIZE: Lazy = Lazy::new(|| { + register_int_gauge_vec!( + "pageserver_aux_file_estimated_size", + "The size of all aux files for a timeline in aux file v2 store.", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + pub(crate) mod initial_logical_size { use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec}; use once_cell::sync::Lazy; @@ -2115,6 +2124,7 @@ pub(crate) struct TimelineMetrics { resident_physical_size_gauge: UIntGauge, /// copy of LayeredTimeline.current_logical_size pub current_logical_size_gauge: UIntGauge, + pub aux_file_size_gauge: IntGauge, pub directory_entries_count_gauge: Lazy UIntGauge>>, pub evictions: IntCounter, pub evictions_with_low_residence_duration: std::sync::RwLock, @@ -2187,6 +2197,9 @@ impl TimelineMetrics { let current_logical_size_gauge = CURRENT_LOGICAL_SIZE .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); + let aux_file_size_gauge = AUX_FILE_SIZE + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065 let directory_entries_count_gauge_closure = { let tenant_shard_id = *tenant_shard_id; @@ -2224,6 +2237,7 @@ impl TimelineMetrics { last_record_gauge, resident_physical_size_gauge, current_logical_size_gauge, + aux_file_size_gauge, directory_entries_count_gauge, evictions, evictions_with_low_residence_duration: std::sync::RwLock::new( @@ -2264,6 +2278,7 @@ impl TimelineMetrics { let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]); } let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]); + let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); self.evictions_with_low_residence_duration .write() diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 0c608c6a7f9f..1c90b89d0026 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -704,9 +704,9 @@ impl Timeline { let v = v.context("get value")?; let v = aux_file::decode_file_value_bytes(&v).context("value decode")?; for (fname, content) in v { - result.insert(fname, content); sz += fname.len(); sz += content.len(); + result.insert(fname, content); } } self.aux_file_size_estimator.on_base_backup(sz); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index ce22f003525b..1b28b1116866 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2160,6 +2160,16 @@ impl Timeline { }; Arc::new_cyclic(|myself| { + let metrics = TimelineMetrics::new( + &tenant_shard_id, + &timeline_id, + crate::metrics::EvictionsWithLowResidenceDurationBuilder::new( + "mtime", + evictions_low_residence_duration_metric_threshold, + ), + ); + let aux_file_metrics = metrics.aux_file_size_gauge.clone(); + let mut result = Timeline { conf, tenant_conf, @@ -2191,14 +2201,7 @@ impl Timeline { ancestor_timeline: ancestor, ancestor_lsn: metadata.ancestor_lsn(), - metrics: TimelineMetrics::new( - &tenant_shard_id, - &timeline_id, - crate::metrics::EvictionsWithLowResidenceDurationBuilder::new( - "mtime", - evictions_low_residence_duration_metric_threshold, - ), - ), + metrics, query_metrics: crate::metrics::SmgrQueryTimePerTimeline::new( &tenant_shard_id, @@ -2263,7 +2266,7 @@ impl Timeline { n_deltas: 0, }), - aux_file_size_estimator: AuxFileSizeEstimator::new(), + aux_file_size_estimator: AuxFileSizeEstimator::new(aux_file_metrics), }; result.repartition_threshold = result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE; diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 7d34e12ca333..8fa67e75c9ab 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -149,6 +149,7 @@ def histogram(prefix_without_trailing_underscore: str) -> List[str]: "pageserver_storage_operations_seconds_sum_total", "pageserver_evictions_total", "pageserver_evictions_with_low_residence_duration_total", + "pageserver_aux_file_estimated_size", *PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS, # "pageserver_directory_entries_count", -- only used if above a certain threshold # "pageserver_broken_tenants_count" -- used only for broken From 13977eae249f82ec38cdaaf2573adfabac971003 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Thu, 9 May 2024 10:30:17 -0400 Subject: [PATCH 3/4] remove unused fields Signed-off-by: Alex Chi Z --- pageserver/src/context.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index a76d720cfdfa..46d068896bfe 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -100,8 +100,6 @@ pub struct RequestContext { access_stats_behavior: AccessStatsBehavior, page_content_kind: PageContentKind, pub micros_spent_throttled: optional_counter::MicroSecondsCounterU32, - /// Total number of kilobytes of layer files processed in this request. - pub vectored_access_delta_file_size_kb: AtomicUsize, } /// The kind of access to the page cache. @@ -158,7 +156,6 @@ impl RequestContextBuilder { access_stats_behavior: AccessStatsBehavior::Update, page_content_kind: PageContentKind::Unknown, micros_spent_throttled: Default::default(), - vectored_access_delta_file_size_kb: AtomicUsize::new(0), }, } } @@ -173,7 +170,6 @@ impl RequestContextBuilder { access_stats_behavior: original.access_stats_behavior, page_content_kind: original.page_content_kind, micros_spent_throttled: Default::default(), - vectored_access_delta_file_size_kb: AtomicUsize::new(0), }, } } From 24ae9b28e972af3840c0b7329ca090e6a70c5014 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Thu, 9 May 2024 10:32:23 -0400 Subject: [PATCH 4/4] fix clippy Signed-off-by: Alex Chi Z --- pageserver/src/context.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index 46d068896bfe..86d0390c30b1 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -86,8 +86,6 @@ //! [`RequestContext`] argument. Functions in the middle of the call chain //! only need to pass it on. -use std::sync::atomic::AtomicUsize; - use crate::task_mgr::TaskKind; pub(crate) mod optional_counter;