Skip to content

Commit

Permalink
make compact_target_max_size_mb configurable
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Chi Z <[email protected]>
  • Loading branch information
skyzh committed Dec 9, 2024
1 parent 3966318 commit 72b1bf1
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3020,7 +3020,7 @@ impl Tenant {
let jobs = timeline
.gc_compaction_split_jobs(GcCompactJob::from_compact_options(
next_scheduled_compaction_task.options,
))
), None)
.await
.map_err(CompactionError::Other)?;
if jobs.is_empty() {
Expand Down
13 changes: 9 additions & 4 deletions pageserver/src/tenant/timeline/compaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1805,12 +1805,19 @@ impl Timeline {
pub(crate) async fn gc_compaction_split_jobs(
self: &Arc<Self>,
job: GcCompactJob,
compact_target_max_size_mb: Option<u64>,
) -> anyhow::Result<Vec<GcCompactJob>> {
let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX {
job.compact_lsn_range.end
} else {
*self.get_latest_gc_cutoff_lsn() // use the real gc cutoff
};

// Split compaction job to about 4GB each
const GC_COMPACT_MAX_SIZE_MB: u64 = 4 * 1024;
let compact_target_max_size_mb =
compact_target_max_size_mb.unwrap_or(GC_COMPACT_MAX_SIZE_MB);

let mut compact_jobs = Vec::new();
// For now, we simply use the key partitioning information; we should do a more fine-grained partitioning
// by estimating the amount of files read for a compaction job. We should also partition on LSN.
Expand Down Expand Up @@ -1857,8 +1864,6 @@ impl Timeline {
let guard = self.layers.read().await;
let layer_map = guard.layer_map()?;
let mut current_start = None;
// Split compaction job to about 2GB each
const GC_COMPACT_MAX_SIZE_MB: u64 = 4 * 1024; // 4GB, TODO: should be configuration in the future
let ranges_num = split_key_ranges.len();
for (idx, (start, end)) in split_key_ranges.into_iter().enumerate() {
if current_start.is_none() {
Expand All @@ -1871,7 +1876,7 @@ impl Timeline {
}
let res = layer_map.range_search(start..end, compact_below_lsn);
let total_size = res.found.keys().map(|x| x.layer.file_size()).sum::<u64>();
if total_size > GC_COMPACT_MAX_SIZE_MB * 1024 * 1024 || ranges_num == idx + 1 {
if total_size > compact_target_max_size_mb * 1024 * 1024 || ranges_num == idx + 1 {
// Try to extend the compaction range so that we include at least one full layer file.
let extended_end = res
.found
Expand Down Expand Up @@ -1927,7 +1932,7 @@ impl Timeline {
let job = GcCompactJob::from_compact_options(options);
if sub_compaction {
info!("running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs");
let jobs = self.gc_compaction_split_jobs(job).await?;
let jobs = self.gc_compaction_split_jobs(job, None).await?;
let jobs_len = jobs.len();
for (idx, job) in jobs.into_iter().enumerate() {
info!(
Expand Down
1 change: 1 addition & 0 deletions test_runner/regress/test_compaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder):
if i % 10 == 0:
log.info(f"Running churn round {i}/{churn_rounds} ...")

if (i - 1) % 10 == 0:
# Run gc-compaction every 10 rounds to ensure the test doesn't take too long time.
ps_http.timeline_compact(
tenant_id,
Expand Down

0 comments on commit 72b1bf1

Please sign in to comment.