Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pageserver: add disk_compacted_lsn #10113

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/glossary.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ Neon safekeeper LSNs. See [safekeeper protocol section](safekeeper-protocol.md)
Neon pageserver LSNs:
* `last_record_lsn` - the end of last processed WAL record.
* `disk_consistent_lsn` - data is known to be fully flushed and fsync'd to local disk on pageserver up to this LSN.
* `disk_compacted_lsn` - data is known to be compacted to L1 on local disk up to this LSN.
* `remote_consistent_lsn` - The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash.
TODO: use this name consistently in remote storage code. Now `disk_consistent_lsn` is used and meaning depends on the context.
* `ancestor_lsn` - LSN of the branch point (the LSN at which this branch was created)
Expand Down
10 changes: 8 additions & 2 deletions libs/pageserver_api/src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -983,12 +983,18 @@ pub struct TimelineInfo {
pub last_record_lsn: Lsn,
pub prev_record_lsn: Option<Lsn>,
pub latest_gc_cutoff_lsn: Lsn,

/// The LSN that has been flushed to local disk.
pub disk_consistent_lsn: Lsn,

/// The LSN that we have succesfully uploaded to remote storage
/// The LSN that has been compacted down to L1 on local disk.
pub disk_compacted_lsn: Lsn,

/// The LSN that we have succesfully uploaded to remote storage, according to
/// our generation.
pub remote_consistent_lsn: Lsn,

/// The LSN that we are advertizing to safekeepers
/// The LSN that we are advertizing to safekeepers, with verified generation.
pub remote_consistent_lsn_visible: Lsn,

/// The LSN from the start of the root timeline (never changes)
Expand Down
11 changes: 10 additions & 1 deletion libs/utils/src/lsn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use serde::{de::Visitor, Deserialize, Serialize};
use std::fmt;
use std::ops::{Add, AddAssign};
use std::ops::{Add, AddAssign, Sub};
use std::str::FromStr;
use std::sync::atomic::{AtomicU64, Ordering};

Expand Down Expand Up @@ -295,6 +295,15 @@ impl AddAssign<u64> for Lsn {
}
}

impl Sub<u64> for Lsn {
type Output = Lsn;

fn sub(self, other: u64) -> Self::Output {
// panic if the addition overflows.
Lsn(self.0.checked_sub(other).unwrap())
}
}

/// An [`Lsn`] that can be accessed atomically.
pub struct AtomicLsn {
inner: AtomicU64,
Expand Down
3 changes: 3 additions & 0 deletions pageserver/src/http/openapi_spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,9 @@ components:
disk_consistent_lsn:
type: string
format: hex
disk_compacted_lsn:
type: string
format: hex
remote_consistent_lsn:
type: string
format: hex
Expand Down
1 change: 1 addition & 0 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ async fn build_timeline_info_common(
timeline_id: timeline.timeline_id,
ancestor_timeline_id,
ancestor_lsn,
disk_compacted_lsn: timeline.get_disk_compacted_lsn().await.unwrap_or(Lsn(0)),
disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
remote_consistent_lsn: remote_consistent_lsn_projected,
remote_consistent_lsn_visible,
Expand Down
16 changes: 16 additions & 0 deletions pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,15 @@ static DISK_CONSISTENT_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
.expect("failed to define a metric")
});

static DISK_COMPACTED_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
register_int_gauge_vec!(
"pageserver_disk_compacted_lsn",
"Disk compacted LSN grouped by timeline",
&["tenant_id", "shard_id", "timeline_id"]
)
.expect("failed to define a metric")
});

pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy<UIntGaugeVec> = Lazy::new(|| {
register_uint_gauge_vec!(
"pageserver_projected_remote_consistent_lsn",
Expand Down Expand Up @@ -2596,6 +2605,7 @@ pub(crate) struct TimelineMetrics {
pub find_gc_cutoffs_histo: StorageTimeMetrics,
pub last_record_lsn_gauge: IntGauge,
pub disk_consistent_lsn_gauge: IntGauge,
pub disk_compacted_lsn_gauge: IntGauge,
pub pitr_history_size: UIntGauge,
pub archival_size: UIntGauge,
pub(crate) layer_size_image: UIntGauge,
Expand Down Expand Up @@ -2685,6 +2695,10 @@ impl TimelineMetrics {
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();

let disk_compacted_lsn_gauge = DISK_COMPACTED_LSN
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();

let pitr_history_size = PITR_HISTORY_SIZE
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();
Expand Down Expand Up @@ -2790,6 +2804,7 @@ impl TimelineMetrics {
load_layer_map_histo,
last_record_lsn_gauge,
disk_consistent_lsn_gauge,
disk_compacted_lsn_gauge,
pitr_history_size,
archival_size,
layer_size_image,
Expand Down Expand Up @@ -2855,6 +2870,7 @@ impl TimelineMetrics {
let shard_id = &self.shard_id;
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
let _ = DISK_COMPACTED_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
{
Expand Down
2 changes: 1 addition & 1 deletion pageserver/src/tenant/storage_layer/inmemory_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ pub struct InMemoryLayer {

/// This layer contains all the changes from 'start_lsn'. The
/// start is inclusive.
start_lsn: Lsn,
pub(crate) start_lsn: Lsn,

/// Frozen layers have an exclusive end LSN.
/// Writes are only allowed when this is `None`.
Expand Down
33 changes: 33 additions & 0 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,27 @@ impl Timeline {
self.remote_client.remote_consistent_lsn_visible()
}

/// Returns the latest LSN that has been compacted down into L1 on disk, or None on shutdown.
/// LSNs above it are likely in L0 delta layers. Note that these may not yet be uploaded to S3.
///
/// TODO: consider tracking this explicitly instead of recomputing every time.
pub(crate) async fn get_disk_compacted_lsn(&self) -> Option<Lsn> {
let layers = self.layers.read().await;
let Ok(layermap) = layers.layer_map() else {
return None;
};
// Find the lowest LSN in a frozen or L0 delta layer, otherwise use the disk LSN.
// TODO: check that this is correct.
let frozen_lsns = layermap.frozen_layers.iter().map(|l| l.start_lsn);
let delta_lsns = layermap.level0_deltas().iter().map(|l| l.lsn_range.start);
let compacted_lsn = frozen_lsns
.chain(delta_lsns)
.min()
.map(|lsn| lsn.saturating_sub(Lsn(1)))
.unwrap_or(self.disk_consistent_lsn.load());
Some(compacted_lsn)
}

/// The sum of the file size of all historic layers in the layer map.
/// This method makes no distinction between local and remote layers.
/// Hence, the result **does not represent local filesystem usage**.
Expand Down Expand Up @@ -2692,6 +2713,14 @@ impl Timeline {
num_layers, disk_consistent_lsn, total_physical_size
);

// TODO: consider finding a better place for this.
self.metrics.disk_compacted_lsn_gauge.set(
self.get_disk_compacted_lsn()
.await
.expect("layermanager must be open during init")
.0 as i64,
);

timer.stop_and_record();
Ok(())
}
Expand Down Expand Up @@ -4823,6 +4852,10 @@ impl Timeline {

drop_wlock(guard);

if let Some(lsn) = self.get_disk_compacted_lsn().await {
self.metrics.disk_compacted_lsn_gauge.set(lsn.0 as i64);
}

Ok(())
}

Expand Down
Loading