From 8d427ea1694d3f398efff7aac1d4578e52cde6a6 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 29 Nov 2023 16:55:13 +0000 Subject: [PATCH] logical size: better represent level of accuracy in the type system I would love to not expose the in-accurate value int he mgmt API at all, and in fact control plane doesn't use it [^1]. But our tests do, and I have no desire to change them at this time. [^1]: https://github.com/neondatabase/cloud/pull/8317 --- libs/pageserver_api/src/models.rs | 4 +- pageserver/src/consumption_metrics/metrics.rs | 13 ++--- pageserver/src/http/routes.rs | 14 +++--- pageserver/src/tenant/timeline.rs | 15 +++--- .../src/tenant/timeline/logical_size.rs | 49 +++++++++++++++---- .../walreceiver/walreceiver_connection.rs | 7 +-- 6 files changed, 63 insertions(+), 39 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index c792a5eff769..539b0f4c869d 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -400,7 +400,9 @@ pub struct TimelineInfo { /// The LSN that we are advertizing to safekeepers pub remote_consistent_lsn_visible: Lsn, - pub current_logical_size: Option, // is None when timeline is Unloaded + pub current_logical_size: u64, + pub current_logical_size_is_accurate: bool, + /// Sum of the size of all layer files. /// If a layer is present in both local FS and S3, it counts only once. pub current_physical_size: Option, // is None when timeline is Unloaded diff --git a/pageserver/src/consumption_metrics/metrics.rs b/pageserver/src/consumption_metrics/metrics.rs index 2989e15e8eaa..ad5a564259b2 100644 --- a/pageserver/src/consumption_metrics/metrics.rs +++ b/pageserver/src/consumption_metrics/metrics.rs @@ -1,5 +1,4 @@ -use crate::context::RequestContext; -use anyhow::Context; +use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize}; use chrono::{DateTime, Utc}; use consumption_metrics::EventType; use futures::stream::StreamExt; @@ -352,13 +351,11 @@ impl TimelineSnapshot { let current_exact_logical_size = { let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_id, timeline_id = %t.timeline_id); - let res = span - .in_scope(|| t.get_current_logical_size(ctx)) - .context("get_current_logical_size"); - match res? { + let size = span.in_scope(|| t.get_current_logical_size(ctx)); + match size { // Only send timeline logical size when it is fully calculated. - (size, is_exact) if is_exact => Some(size), - (_, _) => None, + CurrentLogicalSize::Exact(ref size) => Some(size.into()), + CurrentLogicalSize::Approximate(_) => None, } }; diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 5ce09500ee81..42084a2340fc 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -337,13 +337,7 @@ async fn build_timeline_info_common( Lsn(0) => None, lsn @ Lsn(_) => Some(lsn), }; - let current_logical_size = match timeline.get_current_logical_size(ctx) { - Ok((size, _)) => Some(size), - Err(err) => { - error!("Timeline info creation failed to get current logical size: {err:?}"); - None - } - }; + let current_logical_size = timeline.get_current_logical_size(ctx); let current_physical_size = Some(timeline.layer_size_sum().await); let state = timeline.current_state(); let remote_consistent_lsn_projected = timeline @@ -366,7 +360,11 @@ async fn build_timeline_info_common( last_record_lsn, prev_record_lsn: Some(timeline.get_prev_record_lsn()), latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(), - current_logical_size, + current_logical_size: current_logical_size.size_dont_care_about_accuracy(), + current_logical_size_is_accurate: match current_logical_size.accuracy() { + tenant::timeline::logical_size::Accuracy::Approximate => false, + tenant::timeline::logical_size::Accuracy::Exact => true, + }, current_physical_size, current_logical_size_non_incremental: None, timeline_dir_layer_file_size_sum: None, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 371aceab1c9b..f178bf5766e1 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2,7 +2,7 @@ pub mod delete; mod eviction_task; mod init; pub mod layer_manager; -mod logical_size; +pub(crate) mod logical_size; pub mod span; pub mod uninit; mod walreceiver; @@ -851,23 +851,20 @@ impl Timeline { /// the initial size calculation has not been run (gets triggered on the first size access). /// /// return size and boolean flag that shows if the size is exact - pub fn get_current_logical_size( + pub(crate) fn get_current_logical_size( self: &Arc, ctx: &RequestContext, - ) -> anyhow::Result<(u64, bool)> { + ) -> logical_size::CurrentLogicalSize { let current_size = self.current_logical_size.current_size(); debug!("Current size: {current_size:?}"); - let mut is_exact = true; - let size = current_size.size(); if let (CurrentLogicalSize::Approximate(_), Some(initial_part_end)) = (current_size, self.current_logical_size.initial_part_end) { - is_exact = false; self.try_spawn_size_init_task(initial_part_end, ctx); } - Ok((size, is_exact)) + current_size } /// Check if more than 'checkpoint_distance' of WAL has been accumulated in @@ -2037,10 +2034,10 @@ impl Timeline { // one value while current_logical_size is set to the // other. match logical_size.current_size() { - CurrentLogicalSize::Exact(new_current_size) => self + CurrentLogicalSize::Exact(ref new_current_size) => self .metrics .current_logical_size_gauge - .set(new_current_size), + .set(new_current_size.into()), CurrentLogicalSize::Approximate(_) => { // don't update the gauge yet, this allows us not to update the gauge back and // forth between the initial size calculation task. diff --git a/pageserver/src/tenant/timeline/logical_size.rs b/pageserver/src/tenant/timeline/logical_size.rs index ec9395525441..1f103051ef5c 100644 --- a/pageserver/src/tenant/timeline/logical_size.rs +++ b/pageserver/src/tenant/timeline/logical_size.rs @@ -59,21 +59,50 @@ pub(super) struct LogicalSize { /// Normalized current size, that the data in pageserver occupies. #[derive(Debug, Clone, Copy)] -pub(super) enum CurrentLogicalSize { +pub(crate) enum CurrentLogicalSize { /// The size is not yet calculated to the end, this is an intermediate result, /// constructed from walreceiver increments and normalized: logical data could delete some objects, hence be negative, /// yet total logical size cannot be below 0. - Approximate(u64), + Approximate(Approximate), // Fully calculated logical size, only other future walreceiver increments are changing it, and those changes are // available for observation without any calculations. - Exact(u64), + Exact(Exact), +} + +#[derive(Debug, Copy, Clone)] +pub(crate) enum Accuracy { + Approximate, + Exact, +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct Approximate(u64); +#[derive(Debug, Clone, Copy)] +pub(crate) struct Exact(u64); + +impl From<&Approximate> for u64 { + fn from(value: &Approximate) -> Self { + value.0 + } +} + +impl From<&Exact> for u64 { + fn from(val: &Exact) -> Self { + val.0 + } } impl CurrentLogicalSize { - pub(super) fn size(&self) -> u64 { - *match self { - Self::Approximate(size) => size, - Self::Exact(size) => size, + pub(crate) fn size_dont_care_about_accuracy(&self) -> u64 { + match self { + Self::Approximate(size) => size.into(), + Self::Exact(size) => size.into(), + } + } + pub(crate) fn accuracy(&self) -> Accuracy { + match self { + Self::Approximate(_) => Accuracy::Approximate, + Self::Exact(_) => Accuracy::Exact, } } } @@ -109,16 +138,16 @@ impl LogicalSize { match self.initial_logical_size.get() { Some((initial_size, _)) => { crate::metrics::initial_logical_size::CALLS.exact.inc(); - CurrentLogicalSize::Exact(initial_size.checked_add_signed(size_increment) + CurrentLogicalSize::Exact(Exact(initial_size.checked_add_signed(size_increment) .with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}")) - .unwrap()) + .unwrap())) } None => { crate::metrics::initial_logical_size::CALLS .approximate .inc(); let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0); - CurrentLogicalSize::Approximate(non_negative_size_increment) + CurrentLogicalSize::Approximate(Approximate(non_negative_size_increment)) } } } diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs index 3e56753ad495..167fd150f8fa 100644 --- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs +++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs @@ -396,11 +396,12 @@ pub(super) async fn handle_walreceiver_connection( // Send the replication feedback message. // Regular standby_status_update fields are put into this message. - let (timeline_logical_size, _) = timeline + let current_timeline_size = timeline .get_current_logical_size(&ctx) - .context("Status update creation failed to get current logical size")?; + // FIXME: https://github.com/neondatabase/neon/issues/5963 + .size_dont_care_about_accuracy(); let status_update = PageserverFeedback { - current_timeline_size: timeline_logical_size, + current_timeline_size, last_received_lsn, disk_consistent_lsn, remote_consistent_lsn,