From d58e3f8b667e7a3d2efaa5043d8bdb04dbb2f08a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Thu, 16 May 2024 22:25:19 +0200 Subject: [PATCH] Tiered compaction: improvements to the windows (#7787) Tiered compaction employs two sliding windows over the keyspace: `KeyspaceWindow` for the image layer generation and `Window` for the delta layer generation. Do some fixes to both windows: * The distinction between the two windows is not very clear. Do the absolute minimum to mention where they are used in the rustdoc description of the struct. Maybe we should rename them (say `WindowForImage` and `WindowForDelta`) or merge them into one window implementation. * Require the keys to strictly increase. The `accum_key_values` already combines the key, so there is no logic needed in `Window::feed` for the same key repeating. This is a follow-up to address the request in https://github.com/neondatabase/neon/pull/7671#pullrequestreview-2051995541 * In `choose_next_delta`, we claimed in the comment to use 1.25 as the factor but it was 1.66 instead. Fix this discrepancy by using `*5/4` as the two operations. --- pageserver/compaction/src/compact_tiered.rs | 24 +++++++++++---------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pageserver/compaction/src/compact_tiered.rs b/pageserver/compaction/src/compact_tiered.rs index 33c9948f45e5..20f88868f91d 100644 --- a/pageserver/compaction/src/compact_tiered.rs +++ b/pageserver/compaction/src/compact_tiered.rs @@ -663,8 +663,8 @@ where } } -// Sliding window through keyspace and values -// This is used by over_with_images to decide on good split points +/// Sliding window through keyspace and values for image layer +/// This is used by [`LevelCompactionState::cover_with_images`] to decide on good split points struct KeyspaceWindow { head: KeyspaceWindowHead, @@ -804,9 +804,9 @@ struct WindowElement { accum_size: u64, } -// Sliding window through keyspace and values -// -// This is used to decide what layer to write next, from the beginning of the window. +/// Sliding window through keyspace and values for delta layer tiling +/// +/// This is used to decide which delta layer to write next. struct Window { elems: VecDeque>, @@ -830,11 +830,13 @@ where fn feed(&mut self, key: K, size: u64) { let last_size; if let Some(last) = self.elems.back_mut() { - assert!(last.last_key <= key); - if key == last.last_key { - last.accum_size += size; - return; - } + // We require the keys to be strictly increasing for the window. + // Keys should already have been deduplicated by `accum_key_values` + assert!( + last.last_key < key, + "last_key(={}) >= key(={key})", + last.last_key + ); last_size = last.accum_size; } else { last_size = 0; @@ -922,7 +924,7 @@ where // If we're willing to stretch it up to 1.25 target size, could we // gobble up the rest of the work? This avoids creating very small // "tail" layers at the end of the keyspace - if !has_more && self.remain_size() < target_size * 5 / 3 { + if !has_more && self.remain_size() < target_size * 5 / 4 { self.commit_upto(self.elems.len()); } else { let delta_split_at = self.find_size_split(target_size);