Skip to content

Commit

Permalink
initial logical size: remove CALLS metric from hot path (#6018)
Browse files Browse the repository at this point in the history
Only introduced a few hours ago (#5995), I took a look at the numbers
from staging and realized that `get_current_logical_size()` is on the
walingest hot path: we call it for every `ReplicationMessage::XLogData`
that we receive.

Since the metric is global, it would be quite a busy cache line.

This PR replaces it with a new metric purpose-built for what's most
interesting right now.
  • Loading branch information
problame authored Dec 1, 2023
1 parent c1295bf commit e43cde7
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 23 deletions.
28 changes: 10 additions & 18 deletions pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -511,24 +511,16 @@ pub(crate) mod initial_logical_size {
}
}

pub(crate) struct Calls {
pub(crate) approximate: IntCounter,
pub(crate) exact: IntCounter,
}

pub(crate) static CALLS: Lazy<Calls> = Lazy::new(|| {
let vec = register_int_counter_vec!(
"pageserver_initial_logical_size_calls",
"Incremented each time some code asks for incremental logical size.\
The label records the accuracy of the result.",
&["accuracy"]
)
.unwrap();
Calls {
approximate: vec.with_label_values(&["approximate"]),
exact: vec.with_label_values(&["exact"]),
}
});
// context: https://github.com/neondatabase/neon/issues/5963
pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
Lazy::new(|| {
register_int_counter!(
"pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
"Counter for the following event: walreceiver calls\
Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
)
.unwrap()
});
}

pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
Expand Down
18 changes: 18 additions & 0 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,24 @@ impl Timeline {
self.try_spawn_size_init_task(initial_part_end, ctx);
}

if let CurrentLogicalSize::Approximate(_) = &current_size {
if ctx.task_kind() == TaskKind::WalReceiverConnectionHandler {
let first = self
.current_logical_size
.did_return_approximate_to_walreceiver
.compare_exchange(
false,
true,
AtomicOrdering::Relaxed,
AtomicOrdering::Relaxed,
)
.is_ok();
if first {
crate::metrics::initial_logical_size::TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE.inc();
}
}
}

current_size
}

Expand Down
12 changes: 7 additions & 5 deletions pageserver/src/tenant/timeline/logical_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use once_cell::sync::OnceCell;
use tokio::sync::Semaphore;
use utils::lsn::Lsn;

use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering};
use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering};
use std::sync::Arc;

/// Internal structure to hold all data needed for logical size calculation.
Expand Down Expand Up @@ -55,6 +55,9 @@ pub(super) struct LogicalSize {
/// see `current_logical_size_gauge`. Use the `update_current_logical_size`
/// to modify this, it will also keep the prometheus metric in sync.
pub size_added_after_initial: AtomicI64,

/// For [`crate::metrics::initial_logical_size::TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE`].
pub(super) did_return_approximate_to_walreceiver: AtomicBool,
}

/// Normalized current size, that the data in pageserver occupies.
Expand Down Expand Up @@ -119,6 +122,7 @@ impl LogicalSize {
initial_size_computation: Arc::new(Semaphore::new(0)),
initial_part_end: None,
size_added_after_initial: AtomicI64::new(0),
did_return_approximate_to_walreceiver: AtomicBool::new(false),
}
}

Expand All @@ -128,6 +132,7 @@ impl LogicalSize {
initial_size_computation: Arc::new(Semaphore::new(1)),
initial_part_end: Some(compute_to),
size_added_after_initial: AtomicI64::new(0),
did_return_approximate_to_walreceiver: AtomicBool::new(false),
}
}

Expand All @@ -137,15 +142,12 @@ impl LogicalSize {
// we change the type.
match self.initial_logical_size.get() {
Some((initial_size, _)) => {
crate::metrics::initial_logical_size::CALLS.exact.inc();
CurrentLogicalSize::Exact(Exact(initial_size.checked_add_signed(size_increment)
.with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
.unwrap()))
}
None => {
crate::metrics::initial_logical_size::CALLS
.approximate
.inc();

let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0);
CurrentLogicalSize::Approximate(Approximate(non_negative_size_increment))
}
Expand Down

1 comment on commit e43cde7

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2498 tests run: 2393 passed, 0 failed, 105 skipped (full report)


Flaky tests (2)

Postgres 16

  • test_ondemand_download_timetravel[local_fs]: debug

Postgres 14

Code coverage (full report)

  • functions: 54.6% (9292 of 17026 functions)
  • lines: 82.0% (53961 of 65775 lines)

The comment gets automatically updated with the latest test results
e43cde7 at 2023-12-01T22:22:53.285Z :recycle:

Please sign in to comment.