From a16a91f1348fbac77bd5f4d730e4e5119936fefb Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Mon, 16 Dec 2024 13:35:45 -0500 Subject: [PATCH] faster slow path; check layer valid at the end of compaction Signed-off-by: Alex Chi Z --- pageserver/src/tenant/checks.rs | 64 +++++++++----------- pageserver/src/tenant/timeline/compaction.rs | 30 ++++++++- 2 files changed, 56 insertions(+), 38 deletions(-) diff --git a/pageserver/src/tenant/checks.rs b/pageserver/src/tenant/checks.rs index 49e632ee7fa3..4f918484839a 100644 --- a/pageserver/src/tenant/checks.rs +++ b/pageserver/src/tenant/checks.rs @@ -30,53 +30,43 @@ use super::storage_layer::LayerName; /// If layer 2 and 4 contain the same single key, this is also a valid layer map. /// /// However, if a partial compaction is still going on, it is possible that we get a layer map not satisfying the above condition. -/// Therefore, we fallback to simply check if any of the two delta layers overlap. +/// Therefore, we fallback to simply check if any of the two delta layers overlap. (See "a slow path...") pub fn check_valid_layermap(metadata: &[LayerName]) -> Option { let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?) let mut all_delta_layers = Vec::new(); for name in metadata { if let LayerName::Delta(layer) = name { - if layer.key_range.start.next() != layer.key_range.end { - all_delta_layers.push(layer.clone()); - } + all_delta_layers.push(layer.clone()); } } for layer in &all_delta_layers { - let lsn_range = &layer.lsn_range; - lsn_split_point.insert(lsn_range.start); - lsn_split_point.insert(lsn_range.end); - } - for layer in &all_delta_layers { - let lsn_range = layer.lsn_range.clone(); - let intersects = lsn_split_point.range(lsn_range).collect_vec(); - if intersects.len() > 1 { - return check_valid_layermap_slowpath(metadata); - } - } - None -} - -/// A slow path check for the layer map. Do a one-by-one check for all delta layers and see if any of them overlaps. -/// Overlapping delta layer would cause wrong results in both compaction and the normal read path. -fn check_valid_layermap_slowpath(metadata: &[LayerName]) -> Option { - let mut all_delta_layers = Vec::new(); - for name in metadata { - if let LayerName::Delta(layer) = name { - all_delta_layers.push(layer.clone()); + if layer.key_range.start.next() != layer.key_range.end { + let lsn_range = &layer.lsn_range; + lsn_split_point.insert(lsn_range.start); + lsn_split_point.insert(lsn_range.end); } } - for a in 0..all_delta_layers.len() { - for b in 0..a { - let layer_a = &all_delta_layers[a]; - let layer_b = &all_delta_layers[b]; - if overlaps_with(&layer_a.lsn_range, &layer_b.lsn_range) - && overlaps_with(&layer_a.key_range, &layer_b.key_range) - { - let err = format!( - "layer violates the layer map LSN split assumption: layer {} intersects with layer {}", - layer_a, layer_b - ); - return Some(err); + for (idx, layer) in all_delta_layers.iter().enumerate() { + if layer.key_range.start.next() != layer.key_range.end { + let lsn_range = layer.lsn_range.clone(); + let intersects = lsn_split_point.range(lsn_range).collect_vec(); + if intersects.len() > 1 { + // A slow path to check if the layer intersects with any other delta layer. + for (other_idx, other_layer) in all_delta_layers.iter().enumerate() { + if other_idx == idx { + // do not check self intersects with self + continue; + } + if overlaps_with(&layer.lsn_range, &other_layer.lsn_range) + && overlaps_with(&layer.key_range, &other_layer.key_range) + { + let err = format!( + "layer violates the layer map LSN split assumption: layer {} intersects with layer {}", + layer, other_layer + ); + return Some(err); + } + } } } } diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 9be73bae2080..2343fece9b64 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -2453,8 +2453,36 @@ impl Timeline { ); // Step 3: Place back to the layer map. + + // First, do a sanity check to ensure the newly-created layer map does not contain overlaps. + let all_layers = { + let guard = self.layers.read().await; + let layer_map = guard.layer_map()?; + layer_map.iter_historic_layers().collect_vec() + }; + + let mut final_layers = all_layers + .iter() + .map(|layer| layer.layer_name()) + .collect::>(); + for layer in &layer_selection { + final_layers.remove(&layer.layer_desc().layer_name()); + } + for layer in &compact_to { + final_layers.insert(layer.layer_desc().layer_name()); + } + let final_layers = final_layers.into_iter().collect_vec(); + + // TODO: move this check before we call `finish` on image layer writers. However, this will require us to get the layer name before we finish + // the writer, so potentially, we will need a function like `ImageLayerBatchWriter::get_all_pending_layer_keys` to get all the keys that are + // in the writer before finalizing the persistent layers. Now we would leave some dangling layers on the disk if the check fails. + if let Some(err) = check_valid_layermap(&final_layers) { + bail!("gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss", err); + } + + // Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only + // operate on L1 layers. { - // TODO: sanity check if the layer map is valid (i.e., should not have overlaps) let mut guard = self.layers.write().await; guard .open_mut()?