From fca44c25f14cf3d2a3955963f76fc6eb93139a70 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Wed, 2 Oct 2024 13:48:44 -0400 Subject: [PATCH] feat(pageserver): do space check before gc-compaction Signed-off-by: Alex Chi Z --- Cargo.lock | 11 ++++++ Cargo.toml | 1 + pageserver/Cargo.toml | 1 + pageserver/src/tenant/storage_layer/layer.rs | 4 +++ pageserver/src/tenant/timeline/compaction.rs | 35 ++++++++++++++++++++ 5 files changed, 52 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 528ee33193074..5aa4999d8c907 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2049,6 +2049,16 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "fsevent-sys" version = "4.1.0" @@ -3589,6 +3599,7 @@ dependencies = [ "enum-map", "enumset", "fail", + "fs2", "futures", "hex", "hex-literal", diff --git a/Cargo.toml b/Cargo.toml index 7997d34c33548..efb30eec41ae8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,6 +83,7 @@ enumset = "1.0.12" fail = "0.5.0" fallible-iterator = "0.2" framed-websockets = { version = "0.1.0", git = "https://github.com/neondatabase/framed-websockets" } +fs2 = "0.4" futures = "0.3" futures-core = "0.3" futures-util = "0.3" diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index f1fc3a86fe4b6..eab9d04664778 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -26,6 +26,7 @@ consumption_metrics.workspace = true crc32c.workspace = true either.workspace = true fail.workspace = true +fs2.workspace = true futures.workspace = true hex.workspace = true humantime.workspace = true diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index 2923bd35588bb..f2eb88129007c 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -341,6 +341,10 @@ impl Layer { Ok(()) } + pub(crate) async fn needs_download(&self) -> Result, std::io::Error> { + self.0.needs_download().await + } + /// Assuming the layer is already downloaded, returns a guard which will prohibit eviction /// while the guard exists. /// diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 9f64471432e34..5b5b9494deac4 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1688,6 +1688,39 @@ impl Timeline { unreachable!("key retention is empty") } + /// Check how much space is left on the disk + async fn check_available_space(self: &Arc) -> anyhow::Result { + let base_path = self.conf.tenants_path(); + fs2::free_space(base_path).context("fs2::free_space") + } + + /// Check if the compaction can proceed safely without running out of space. We assume the size + /// upper bound of the produced files of a compaction job is the same as all layers involved in + /// the compaction. Therefore, we need `2 * layers_to_be_compacted_size` at least to do a + /// compaction. + async fn check_compaction_space( + self: &Arc, + layer_selection: &[Layer], + ) -> anyhow::Result<()> { + let available_space = self.check_available_space().await?; + let mut remote_layer_size = 0; + let mut all_layer_size = 0; + for layer in layer_selection { + let needs_download = layer.needs_download().await?; + if needs_download.is_some() { + remote_layer_size += layer.layer_desc().file_size; + } + all_layer_size += layer.layer_desc().file_size; + } + let allocated_space = (available_space as f64 * 0.8) as u64; /* reserve 20% space for other tasks */ + if all_layer_size /* space needed for newly-generated file */ + remote_layer_size /* space for downloading layers */ > allocated_space + { + return Err(anyhow!("not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}", + available_space, allocated_space, all_layer_size, remote_layer_size, all_layer_size + remote_layer_size)); + } + Ok(()) + } + /// An experimental compaction building block that combines compaction with garbage collection. /// /// The current implementation picks all delta + image layers that are below or intersecting with @@ -1803,6 +1836,8 @@ impl Timeline { lowest_retain_lsn ); + self.check_compaction_space(&layer_selection).await?; + // Step 1: (In the future) construct a k-merge iterator over all layers. For now, simply collect all keys + LSNs. // Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point. let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)