Skip to content

Commit

Permalink
pageserver: add supplementary branch usag stats
Browse files Browse the repository at this point in the history
  • Loading branch information
jcsp committed Jul 3, 2024
1 parent dae55ba commit 3eb5155
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 0 deletions.
10 changes: 10 additions & 0 deletions libs/pageserver_api/src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,16 @@ pub struct TimelineInfo {
pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
pub current_logical_size_non_incremental: Option<u64>,

/// How many bytes of WAL are within this branch's pitr_interval. If the pitr_interval goes
/// beyond the branch's branch point, we only count up to the branch point.
pub pitr_history_size: u64,

/// Whether this branch's branch point is within its ancestor's PITR interval (i.e. any
/// ancestor data used by this branch would have been retained anyway). If this is false, then
/// this branch may be imposing a cost on the ancestor by causing it to retain layers that it would
/// otherwise be able to GC.
pub within_ancestor_pitr: bool,

pub timeline_dir_layer_file_size_sum: Option<u64>,

pub wal_source_connstr: Option<String>,
Expand Down
4 changes: 4 additions & 0 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,8 @@ async fn build_timeline_info_common(

let walreceiver_status = timeline.walreceiver_status();

let (pitr_history_size, within_ancestor_pitr) = timeline.get_pitr_history_stats();

let info = TimelineInfo {
tenant_id: timeline.tenant_shard_id,
timeline_id: timeline.timeline_id,
Expand All @@ -426,6 +428,8 @@ async fn build_timeline_info_common(
directory_entries_counts: timeline.get_directory_metrics().to_vec(),
current_physical_size,
current_logical_size_non_incremental: None,
pitr_history_size,
within_ancestor_pitr,
timeline_dir_layer_file_size_sum: None,
wal_source_connstr,
last_received_msg_lsn,
Expand Down
31 changes: 31 additions & 0 deletions pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,24 @@ static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
.expect("failed to define a metric")
});

static PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
register_uint_gauge_vec!(
"pageserver_pitr_history_size",
"Data written since PITR cutoff on this timeline",
&["tenant_id", "shard_id", "timeline_id"]
)
.expect("failed to define a metric")
});

static TIMELINE_ARCHIVE_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
register_uint_gauge_vec!(
"pageserver_archive_size",
"Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero",
&["tenant_id", "shard_id", "timeline_id"]
)
.expect("failed to define a metric")
});

static STANDBY_HORIZON: Lazy<IntGaugeVec> = Lazy::new(|| {
register_int_gauge_vec!(
"pageserver_standby_horizon",
Expand Down Expand Up @@ -2102,6 +2120,8 @@ pub(crate) struct TimelineMetrics {
pub garbage_collect_histo: StorageTimeMetrics,
pub find_gc_cutoffs_histo: StorageTimeMetrics,
pub last_record_gauge: IntGauge,
pub pitr_history_size: UIntGauge,
pub archival_size: UIntGauge,
pub standby_horizon_gauge: IntGauge,
pub resident_physical_size_gauge: UIntGauge,
/// copy of LayeredTimeline.current_logical_size
Expand Down Expand Up @@ -2175,6 +2195,15 @@ impl TimelineMetrics {
let last_record_gauge = LAST_RECORD_LSN
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();

let pitr_history_size = PITR_HISTORY_SIZE
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();

let archival_size = TIMELINE_ARCHIVE_SIZE
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();

let standby_horizon_gauge = STANDBY_HORIZON
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();
Expand Down Expand Up @@ -2227,6 +2256,8 @@ impl TimelineMetrics {
find_gc_cutoffs_histo,
load_layer_map_histo,
last_record_gauge,
pitr_history_size,
archival_size,
standby_horizon_gauge,
resident_physical_size_gauge,
current_logical_size_gauge,
Expand Down
26 changes: 26 additions & 0 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2868,6 +2868,7 @@ impl Tenant {
{
let mut target = timeline.gc_info.write().unwrap();

// Cull any expired leases
let now = SystemTime::now();
target.leases.retain(|_, lease| !lease.is_expired(&now));

Expand All @@ -2876,6 +2877,31 @@ impl Tenant {
.valid_lsn_lease_count_gauge
.set(target.leases.len() as u64);

// Look up parent's PITR cutoff to update the child's knowledge of whether it is within parent's PITR
if let Some(ancestor_id) = timeline.get_ancestor_timeline_id() {
if let Some(ancestor_gc_cutoffs) = gc_cutoffs.get(&ancestor_id) {
target.within_ancestor_pitr =
timeline.get_ancestor_lsn() >= ancestor_gc_cutoffs.pitr;
}
}

// Update metrics that depend on GC state
timeline
.metrics
.archival_size
.set(if target.within_ancestor_pitr {
timeline.metrics.current_logical_size_gauge.get()
} else {
0
});
timeline.metrics.pitr_history_size.set(
timeline
.get_last_record_lsn()
.checked_sub(target.cutoffs.pitr)
.unwrap_or(Lsn(0))
.0 as u64,
);

match gc_cutoffs.remove(&timeline.timeline_id) {
Some(cutoffs) => {
target.retain_lsns = branchpoints;
Expand Down
15 changes: 15 additions & 0 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,9 @@ pub(crate) struct GcInfo {

/// Leases granted to particular LSNs.
pub(crate) leases: BTreeMap<Lsn, LsnLease>,

/// Whether our branch point is within our ancestor's PITR interval (for cost estimation)
pub(crate) within_ancestor_pitr: bool,
}

impl GcInfo {
Expand Down Expand Up @@ -851,6 +854,18 @@ impl Timeline {
.map(|ancestor| ancestor.timeline_id)
}

/// Get the bytes written since the PITR cutoff on this branch, and
/// whether this branch's ancestor_lsn is within its parent's PITR.
pub(crate) fn get_pitr_history_stats(&self) -> (u64, bool) {
let gc_info = self.gc_info.read().unwrap();
let history = self
.get_last_record_lsn()
.checked_sub(gc_info.cutoffs.pitr)
.unwrap_or(Lsn(0))
.0;
(history, gc_info.within_ancestor_pitr)
}

/// Lock and get timeline's GC cutoff
pub(crate) fn get_latest_gc_cutoff_lsn(&self) -> RcuReadGuard<Lsn> {
self.latest_gc_cutoff_lsn.read()
Expand Down

0 comments on commit 3eb5155

Please sign in to comment.