Skip to content

Commit

Permalink
pageserver: revise metrics lifetime for SecondaryTenant
Browse files Browse the repository at this point in the history
  • Loading branch information
jcsp committed Nov 20, 2024
1 parent ada8440 commit 307795b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 27 deletions.
32 changes: 23 additions & 9 deletions pageserver/src/tenant/secondary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,6 @@ pub(crate) struct SecondaryTenant {
pub(super) heatmap_total_size_metric: UIntGauge,
}

impl Drop for SecondaryTenant {
fn drop(&mut self) {
let tenant_id = self.tenant_shard_id.tenant_id.to_string();
let shard_id = format!("{}", self.tenant_shard_id.shard_slug());
let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
}
}

impl SecondaryTenant {
pub(crate) fn new(
tenant_shard_id: TenantShardId,
Expand Down Expand Up @@ -167,6 +158,13 @@ impl SecondaryTenant {

// Wait for any secondary downloader work to complete
self.gate.close().await;

self.validate_metrics();

let tenant_id = self.tenant_shard_id.tenant_id.to_string();
let shard_id = format!("{}", self.tenant_shard_id.shard_slug());
let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
}

pub(crate) fn set_config(&self, config: &SecondaryLocationConfig) {
Expand Down Expand Up @@ -254,6 +252,22 @@ impl SecondaryTenant {
.await
.expect("secondary eviction should not have panicked");
}

/// Exhaustive check that incrementally updated metrics match the actual state.
#[cfg(feature = "testing")]
fn validate_metrics(&self) {
if cfg!(feature = "testing") {
let detail = self.detail.lock().unwrap();
let resident_size = detail.total_resident_size();

assert_eq!(resident_size, self.resident_size_metric.get());
}
}

#[cfg(not(feature = "testing"))]
fn validate_metrics(&self) {
// No-op in non-testing builds
}
}

/// The SecondaryController is a pseudo-rpc client for administrative control of secondary mode downloads,
Expand Down
32 changes: 14 additions & 18 deletions pageserver/src/tenant/secondary/downloader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,19 @@ impl SecondaryDetail {
}
}

#[cfg(feature = "testing")]
pub(crate) fn total_resident_size(&self) -> u64 {
self.timelines
.values()
.map(|tl| {
tl.on_disk_layers
.values()
.map(|v| v.metadata.file_size)
.sum::<u64>()
})
.sum::<u64>()
}

pub(super) fn evict_layer(
&mut self,
name: LayerName,
Expand Down Expand Up @@ -763,24 +776,7 @@ impl<'a> TenantDownloader<'a> {
}

// Metrics consistency check in testing builds
if cfg!(feature = "testing") {
let detail = self.secondary_state.detail.lock().unwrap();
let resident_size = detail
.timelines
.values()
.map(|tl| {
tl.on_disk_layers
.values()
.map(|v| v.metadata.file_size)
.sum::<u64>()
})
.sum::<u64>();
assert_eq!(
resident_size,
self.secondary_state.resident_size_metric.get()
);
}

self.secondary_state.validate_metrics();
// Only update last_etag after a full successful download: this way will not skip
// the next download, even if the heatmap's actual etag is unchanged.
self.secondary_state.detail.lock().unwrap().last_download = Some(DownloadSummary {
Expand Down

0 comments on commit 307795b

Please sign in to comment.