Skip to content

Commit

Permalink
feat(pageserver): do space check before gc-compaction (#9250)
Browse files Browse the repository at this point in the history
part of #9114

## Summary of changes

gc-compaction may take a lot of disk space, and if it does, the caller
should do a partial gc-compaction. This patch adds space check for the
compaction job.

---------

Signed-off-by: Alex Chi Z <[email protected]>
  • Loading branch information
skyzh authored Oct 17, 2024
1 parent a7c0568 commit f3a3eef
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 10 deletions.
11 changes: 1 addition & 10 deletions pageserver/src/disk_usage_eviction_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1218,16 +1218,7 @@ mod filesystem_level_usage {
let stat = Statvfs::get(tenants_dir, mock_config)
.context("statvfs failed, presumably directory got unlinked")?;

// https://unix.stackexchange.com/a/703650
let blocksize = if stat.fragment_size() > 0 {
stat.fragment_size()
} else {
stat.block_size()
};

// use blocks_available (b_avail) since, pageserver runs as unprivileged user
let avail_bytes = stat.blocks_available() * blocksize;
let total_bytes = stat.blocks() * blocksize;
let (avail_bytes, total_bytes) = stat.get_avail_total_bytes();

Ok(Usage {
config,
Expand Down
16 changes: 16 additions & 0 deletions pageserver/src/statvfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,22 @@ impl Statvfs {
Statvfs::Mock(stat) => stat.block_size,
}
}

/// Get the available and total bytes on the filesystem.
pub fn get_avail_total_bytes(&self) -> (u64, u64) {
// https://unix.stackexchange.com/a/703650
let blocksize = if self.fragment_size() > 0 {
self.fragment_size()
} else {
self.block_size()
};

// use blocks_available (b_avail) since, pageserver runs as unprivileged user
let avail_bytes = self.blocks_available() * blocksize;
let total_bytes = self.blocks() * blocksize;

(avail_bytes, total_bytes)
}
}

pub mod mock {
Expand Down
4 changes: 4 additions & 0 deletions pageserver/src/tenant/storage_layer/layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,10 @@ impl Layer {
Ok(())
}

pub(crate) async fn needs_download(&self) -> Result<Option<NeedsDownload>, std::io::Error> {
self.0.needs_download().await
}

/// Assuming the layer is already downloaded, returns a guard which will prohibit eviction
/// while the guard exists.
///
Expand Down
42 changes: 42 additions & 0 deletions pageserver/src/tenant/timeline/compaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ use utils::id::TimelineId;

use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
use crate::page_cache;
use crate::statvfs::Statvfs;
use crate::tenant::checks::check_valid_layermap;
use crate::tenant::remote_timeline_client::WaitCompletionError;
use crate::tenant::storage_layer::filter_iterator::FilterIterator;
Expand Down Expand Up @@ -1691,6 +1692,45 @@ impl Timeline {
unreachable!("key retention is empty")
}

/// Check how much space is left on the disk
async fn check_available_space(self: &Arc<Self>) -> anyhow::Result<u64> {
let tenants_dir = self.conf.tenants_path();

let stat = Statvfs::get(&tenants_dir, None)
.context("statvfs failed, presumably directory got unlinked")?;

let (avail_bytes, _) = stat.get_avail_total_bytes();

Ok(avail_bytes)
}

/// Check if the compaction can proceed safely without running out of space. We assume the size
/// upper bound of the produced files of a compaction job is the same as all layers involved in
/// the compaction. Therefore, we need `2 * layers_to_be_compacted_size` at least to do a
/// compaction.
async fn check_compaction_space(
self: &Arc<Self>,
layer_selection: &[Layer],
) -> anyhow::Result<()> {
let available_space = self.check_available_space().await?;
let mut remote_layer_size = 0;
let mut all_layer_size = 0;
for layer in layer_selection {
let needs_download = layer.needs_download().await?;
if needs_download.is_some() {
remote_layer_size += layer.layer_desc().file_size;
}
all_layer_size += layer.layer_desc().file_size;
}
let allocated_space = (available_space as f64 * 0.8) as u64; /* reserve 20% space for other tasks */
if all_layer_size /* space needed for newly-generated file */ + remote_layer_size /* space for downloading layers */ > allocated_space
{
return Err(anyhow!("not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}",
available_space, allocated_space, all_layer_size, remote_layer_size, all_layer_size + remote_layer_size));
}
Ok(())
}

/// An experimental compaction building block that combines compaction with garbage collection.
///
/// The current implementation picks all delta + image layers that are below or intersecting with
Expand Down Expand Up @@ -1806,6 +1846,8 @@ impl Timeline {
lowest_retain_lsn
);

self.check_compaction_space(&layer_selection).await?;

// Step 1: (In the future) construct a k-merge iterator over all layers. For now, simply collect all keys + LSNs.
// Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point.
let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)
Expand Down

1 comment on commit f3a3eef

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5290 tests run: 5073 passed, 0 failed, 217 skipped (full report)


Flaky tests (3)

Postgres 17

Postgres 15

Postgres 14

Code coverage* (full report)

  • functions: 31.3% (7565 of 24141 functions)
  • lines: 49.2% (60485 of 123027 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
f3a3eef at 2024-10-17T16:36:47.914Z :recycle:

Please sign in to comment.