-
Notifications
You must be signed in to change notification settings - Fork 463
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test(pageserver): quantify compaction outcome (#7867)
A simple API to collect some statistics after compaction to easily understand the result. The tool reads the layer map, and analyze range by range instead of doing single-key operations, which is more efficient than doing a benchmark to collect the result. It currently computes two key metrics: * Latest data access efficiency, which finds how many delta layers / image layers the system needs to iterate before returning any key in a key range. * (Approximate) PiTR efficiency, as in #7770, which is simply the number of delta files in the range. The reason behind that is, assume no image layer is created, PiTR efficiency is simply the cost of collect records from the delta layers, and the replay time. Number of delta files (or in the future, estimated size of reads) is a simple yet efficient way of estimating how much effort the page server needs to reconstruct a page. Signed-off-by: Alex Chi Z <[email protected]>
- Loading branch information
Showing
6 changed files
with
151 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
pub(crate) mod analysis; | ||
mod compaction; | ||
pub mod delete; | ||
pub(crate) mod detach_ancestor; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
use std::{collections::BTreeSet, ops::Range}; | ||
|
||
use utils::lsn::Lsn; | ||
|
||
use super::Timeline; | ||
|
||
#[derive(serde::Serialize)] | ||
pub(crate) struct RangeAnalysis { | ||
start: String, | ||
end: String, | ||
has_image: bool, | ||
num_of_deltas_above_image: usize, | ||
total_num_of_deltas: usize, | ||
} | ||
|
||
impl Timeline { | ||
pub(crate) async fn perf_info(&self) -> Vec<RangeAnalysis> { | ||
// First, collect all split points of the layers. | ||
let mut split_points = BTreeSet::new(); | ||
let mut delta_ranges = Vec::new(); | ||
let mut image_ranges = Vec::new(); | ||
|
||
let all_layer_files = { | ||
let guard = self.layers.read().await; | ||
guard.all_persistent_layers() | ||
}; | ||
let lsn = self.get_last_record_lsn(); | ||
|
||
for key in all_layer_files { | ||
split_points.insert(key.key_range.start); | ||
split_points.insert(key.key_range.end); | ||
if key.is_delta { | ||
delta_ranges.push((key.key_range.clone(), key.lsn_range.clone())); | ||
} else { | ||
image_ranges.push((key.key_range.clone(), key.lsn_range.start)); | ||
} | ||
} | ||
|
||
// For each split range, compute the estimated read amplification. | ||
let split_points = split_points.into_iter().collect::<Vec<_>>(); | ||
|
||
let mut result = Vec::new(); | ||
|
||
for i in 0..(split_points.len() - 1) { | ||
let start = split_points[i]; | ||
let end = split_points[i + 1]; | ||
// Find the latest image layer that contains the information. | ||
let mut maybe_image_layers = image_ranges | ||
.iter() | ||
// We insert split points for all image layers, and therefore a `contains` check for the start point should be enough. | ||
.filter(|(key_range, img_lsn)| key_range.contains(&start) && img_lsn <= &lsn) | ||
.cloned() | ||
.collect::<Vec<_>>(); | ||
maybe_image_layers.sort_by(|a, b| a.1.cmp(&b.1)); | ||
let image_layer = maybe_image_layers.last().cloned(); | ||
let lsn_filter_start = image_layer | ||
.as_ref() | ||
.map(|(_, lsn)| *lsn) | ||
.unwrap_or(Lsn::INVALID); | ||
|
||
fn overlaps_with(lsn_range_a: &Range<Lsn>, lsn_range_b: &Range<Lsn>) -> bool { | ||
!(lsn_range_a.end <= lsn_range_b.start || lsn_range_a.start >= lsn_range_b.end) | ||
} | ||
|
||
let maybe_delta_layers = delta_ranges | ||
.iter() | ||
.filter(|(key_range, lsn_range)| { | ||
key_range.contains(&start) && overlaps_with(&(lsn_filter_start..lsn), lsn_range) | ||
}) | ||
.cloned() | ||
.collect::<Vec<_>>(); | ||
|
||
let pitr_delta_layers = delta_ranges | ||
.iter() | ||
.filter(|(key_range, _)| key_range.contains(&start)) | ||
.cloned() | ||
.collect::<Vec<_>>(); | ||
|
||
result.push(RangeAnalysis { | ||
start: start.to_string(), | ||
end: end.to_string(), | ||
has_image: image_layer.is_some(), | ||
num_of_deltas_above_image: maybe_delta_layers.len(), | ||
total_num_of_deltas: pitr_delta_layers.len(), | ||
}); | ||
} | ||
|
||
result | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3e63d0f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
3280 tests run: 3128 passed, 0 failed, 152 skipped (full report)
Code coverage* (full report)
functions
:31.5% (6603 of 20963 functions)
lines
:48.5% (51070 of 105401 lines)
* collected from Rust tests only
3e63d0f at 2024-06-10T10:06:51.464Z :recycle: