From 7f517640011a5dc2c811c66c2767ae58e27d0c79 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Mon, 13 May 2024 11:33:41 -0400 Subject: [PATCH] feat(pageserver): add metrics for aux file size (#7623) ref https://github.com/neondatabase/neon/issues/7443 ## Summary of changes This pull request adds a size estimator for aux files. Each timeline stores a cached `isize` for the estimated total size of aux files. It gets reset on basebackup, and gets updated for each aux file modification. TODO: print a warning when it exceeds the size. The size metrics is not accurate. Race between `on_basebackup` and other functions could create a negative basebackup size, but the chance is rare. Anyways, this does not impose any extra I/Os to the storage as everything is computed in-memory. The aux files are only stored on shard 0. As basebackups are only generated on shard 0, only shard 0 will report this metrics. --------- Signed-off-by: Alex Chi Z --- pageserver/src/aux_file.rs | 52 +++++++++++++++++++++++++++++ pageserver/src/metrics.rs | 15 +++++++++ pageserver/src/pgdatadir_mapping.rs | 52 +++++++++++++++++++++-------- pageserver/src/tenant/timeline.rs | 32 ++++++++++++------ test_runner/fixtures/metrics.py | 1 + 5 files changed, 128 insertions(+), 24 deletions(-) diff --git a/pageserver/src/aux_file.rs b/pageserver/src/aux_file.rs index a26ed84a0d7d..e6d950487de0 100644 --- a/pageserver/src/aux_file.rs +++ b/pageserver/src/aux_file.rs @@ -1,3 +1,6 @@ +use std::sync::Arc; + +use ::metrics::IntGauge; use bytes::{Buf, BufMut, Bytes}; use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE}; use tracing::warn; @@ -140,6 +143,55 @@ pub fn encode_file_value(files: &[(&str, &[u8])]) -> anyhow::Result> { Ok(encoded) } +/// An estimation of the size of aux files. +pub struct AuxFileSizeEstimator { + aux_file_size_gauge: IntGauge, + size: Arc>>, +} + +impl AuxFileSizeEstimator { + pub fn new(aux_file_size_gauge: IntGauge) -> Self { + Self { + aux_file_size_gauge, + size: Arc::new(std::sync::Mutex::new(None)), + } + } + + pub fn on_base_backup(&self, new_size: usize) { + let mut guard = self.size.lock().unwrap(); + *guard = Some(new_size as isize); + self.report(new_size as isize); + } + + pub fn on_add(&self, file_size: usize) { + let mut guard = self.size.lock().unwrap(); + if let Some(size) = &mut *guard { + *size += file_size as isize; + self.report(*size); + } + } + + pub fn on_remove(&self, file_size: usize) { + let mut guard = self.size.lock().unwrap(); + if let Some(size) = &mut *guard { + *size -= file_size as isize; + self.report(*size); + } + } + + pub fn on_update(&self, old_size: usize, new_size: usize) { + let mut guard = self.size.lock().unwrap(); + if let Some(size) = &mut *guard { + *size += new_size as isize - old_size as isize; + self.report(*size); + } + } + + pub fn report(&self, size: isize) { + self.aux_file_size_gauge.set(size as i64); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 256f2f334c1d..b27bfb43b077 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -585,6 +585,15 @@ static CURRENT_LOGICAL_SIZE: Lazy = Lazy::new(|| { .expect("failed to define current logical size metric") }); +static AUX_FILE_SIZE: Lazy = Lazy::new(|| { + register_int_gauge_vec!( + "pageserver_aux_file_estimated_size", + "The size of all aux files for a timeline in aux file v2 store.", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + pub(crate) mod initial_logical_size { use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec}; use once_cell::sync::Lazy; @@ -2115,6 +2124,7 @@ pub(crate) struct TimelineMetrics { resident_physical_size_gauge: UIntGauge, /// copy of LayeredTimeline.current_logical_size pub current_logical_size_gauge: UIntGauge, + pub aux_file_size_gauge: IntGauge, pub directory_entries_count_gauge: Lazy UIntGauge>>, pub evictions: IntCounter, pub evictions_with_low_residence_duration: std::sync::RwLock, @@ -2187,6 +2197,9 @@ impl TimelineMetrics { let current_logical_size_gauge = CURRENT_LOGICAL_SIZE .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); + let aux_file_size_gauge = AUX_FILE_SIZE + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065 let directory_entries_count_gauge_closure = { let tenant_shard_id = *tenant_shard_id; @@ -2224,6 +2237,7 @@ impl TimelineMetrics { last_record_gauge, resident_physical_size_gauge, current_logical_size_gauge, + aux_file_size_gauge, directory_entries_count_gauge, evictions, evictions_with_low_residence_duration: std::sync::RwLock::new( @@ -2264,6 +2278,7 @@ impl TimelineMetrics { let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]); } let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]); + let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); self.evictions_with_low_residence_duration .write() diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index ffcab5f1409c..1092d64d33fe 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -699,13 +699,17 @@ impl Timeline { .await .context("scan")?; let mut result = HashMap::new(); + let mut sz = 0; for (_, v) in kv { let v = v.context("get value")?; let v = aux_file::decode_file_value_bytes(&v).context("value decode")?; for (fname, content) in v { + sz += fname.len(); + sz += content.len(); result.insert(fname, content); } } + self.aux_file_size_estimator.on_base_backup(sz); Ok(result) } @@ -1474,23 +1478,45 @@ impl<'a> DatadirModification<'a> { Err(PageReconstructError::MissingKey(_)) => None, Err(e) => return Err(e.into()), }; - let files = if let Some(ref old_val) = old_val { + let files: Vec<(&str, &[u8])> = if let Some(ref old_val) = old_val { aux_file::decode_file_value(old_val)? } else { Vec::new() }; - let new_files = if content.is_empty() { - files - .into_iter() - .filter(|(p, _)| &path != p) - .collect::>() - } else { - files - .into_iter() - .filter(|(p, _)| &path != p) - .chain(std::iter::once((path, content))) - .collect::>() - }; + let mut other_files = Vec::with_capacity(files.len()); + let mut modifying_file = None; + for file @ (p, content) in files { + if path == p { + assert!( + modifying_file.is_none(), + "duplicated entries found for {}", + path + ); + modifying_file = Some(content); + } else { + other_files.push(file); + } + } + let mut new_files = other_files; + match (modifying_file, content.is_empty()) { + (Some(old_content), false) => { + self.tline + .aux_file_size_estimator + .on_update(old_content.len(), content.len()); + new_files.push((path, content)); + } + (Some(old_content), true) => { + self.tline + .aux_file_size_estimator + .on_remove(old_content.len()); + // not adding the file key to the final `new_files` vec. + } + (None, false) => { + self.tline.aux_file_size_estimator.on_add(content.len()); + new_files.push((path, content)); + } + (None, true) => anyhow::bail!("removing non-existing aux file: {}", path), + } let new_val = aux_file::encode_file_value(&new_files)?; self.put(key, Value::Image(new_val.into())); } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index d2fcd6c4a5d1..01f354b9e85d 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -61,9 +61,12 @@ use std::{ }; use crate::tenant::timeline::init::LocalLayerFileMetadata; -use crate::tenant::{ - layer_map::{LayerMap, SearchResult}, - metadata::TimelineMetadata, +use crate::{ + aux_file::AuxFileSizeEstimator, + tenant::{ + layer_map::{LayerMap, SearchResult}, + metadata::TimelineMetadata, + }, }; use crate::{ context::{DownloadBehavior, RequestContext}, @@ -409,6 +412,8 @@ pub struct Timeline { /// Keep aux directory cache to avoid it's reconstruction on each update pub(crate) aux_files: tokio::sync::Mutex, + + pub(crate) aux_file_size_estimator: AuxFileSizeEstimator, } pub struct WalReceiverInfo { @@ -2161,6 +2166,16 @@ impl Timeline { }; Arc::new_cyclic(|myself| { + let metrics = TimelineMetrics::new( + &tenant_shard_id, + &timeline_id, + crate::metrics::EvictionsWithLowResidenceDurationBuilder::new( + "mtime", + evictions_low_residence_duration_metric_threshold, + ), + ); + let aux_file_metrics = metrics.aux_file_size_gauge.clone(); + let mut result = Timeline { conf, tenant_conf, @@ -2192,14 +2207,7 @@ impl Timeline { ancestor_timeline: ancestor, ancestor_lsn: metadata.ancestor_lsn(), - metrics: TimelineMetrics::new( - &tenant_shard_id, - &timeline_id, - crate::metrics::EvictionsWithLowResidenceDurationBuilder::new( - "mtime", - evictions_low_residence_duration_metric_threshold, - ), - ), + metrics, query_metrics: crate::metrics::SmgrQueryTimePerTimeline::new( &tenant_shard_id, @@ -2263,6 +2271,8 @@ impl Timeline { dir: None, n_deltas: 0, }), + + aux_file_size_estimator: AuxFileSizeEstimator::new(aux_file_metrics), }; result.repartition_threshold = result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE; diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 7d34e12ca333..8fa67e75c9ab 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -149,6 +149,7 @@ def histogram(prefix_without_trailing_underscore: str) -> List[str]: "pageserver_storage_operations_seconds_sum_total", "pageserver_evictions_total", "pageserver_evictions_with_low_residence_duration_total", + "pageserver_aux_file_estimated_size", *PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS, # "pageserver_directory_entries_count", -- only used if above a certain threshold # "pageserver_broken_tenants_count" -- used only for broken