Skip to content

Commit

Permalink
Make DiskBtreeReader::dump async (#4838)
Browse files Browse the repository at this point in the history
## Problem

`DiskBtreeReader::dump` calls `read_blk` internally, which we want to
make async in the future. As it is currently relying on recursion, and
async doesn't like recursion, we want to find an alternative to that and
instead traverse the tree using a loop and a manual stack.

## Summary of changes

* Make `DiskBtreeReader::dump` and all the places calling it async
* Make `DiskBtreeReader::dump` non-recursive internally and use a stack
instead. It now deparses the node in each iteration, which isn't
optimal, but on the other hand it's hard to store the node as it is
referencing the buffer. Self referential data are hard in Rust. For a
dumping function, speed isn't a priority so we deparse the node multiple
times now (up to branching factor many times).

Part of #4743

I have verified that output is unchanged by comparing the output of this
command both before and after this patch:
```
cargo test -p pageserver -- particular_data --nocapture 
```
  • Loading branch information
arpad-m authored Jul 31, 2023
1 parent 89ee8f2 commit e5183f8
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 37 deletions.
73 changes: 38 additions & 35 deletions pageserver/src/tenant/disk_btree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -390,39 +390,42 @@ where
}

#[allow(dead_code)]
pub fn dump(&self) -> Result<()> {
self.dump_recurse(self.root_blk, &[], 0)
}

fn dump_recurse(&self, blknum: u32, path: &[u8], depth: usize) -> Result<()> {
let blk = self.reader.read_blk(self.start_blk + blknum)?;
let buf: &[u8] = blk.as_ref();

let node = OnDiskNode::<L>::deparse(buf)?;

print!("{:indent$}", "", indent = depth * 2);
println!(
"blk #{}: path {}: prefix {}, suffix_len {}",
blknum,
hex::encode(path),
hex::encode(node.prefix),
node.suffix_len
);
pub async fn dump(&self) -> Result<()> {
let mut stack = Vec::new();

stack.push((self.root_blk, String::new(), 0, 0, 0));

while let Some((blknum, path, depth, child_idx, key_off)) = stack.pop() {
let blk = self.reader.read_blk(self.start_blk + blknum)?;
let buf: &[u8] = blk.as_ref();
let node = OnDiskNode::<L>::deparse(buf)?;

if child_idx == 0 {
print!("{:indent$}", "", indent = depth * 2);
let path_prefix = stack
.iter()
.map(|(_blknum, path, ..)| path.as_str())
.collect::<String>();
println!(
"blk #{blknum}: path {path_prefix}{path}: prefix {}, suffix_len {}",
hex::encode(node.prefix),
node.suffix_len
);
}

let mut idx = 0;
let mut key_off = 0;
while idx < node.num_children {
if child_idx + 1 < node.num_children {
let key_off = key_off + node.suffix_len as usize;
stack.push((blknum, path.clone(), depth, child_idx + 1, key_off));
}
let key = &node.keys[key_off..key_off + node.suffix_len as usize];
let val = node.value(idx as usize);
let val = node.value(child_idx as usize);

print!("{:indent$}", "", indent = depth * 2 + 2);
println!("{}: {}", hex::encode(key), hex::encode(val.0));

if node.level > 0 {
let child_path = [path, node.prefix].concat();
self.dump_recurse(val.to_blknum(), &child_path, depth + 1)?;
stack.push((val.to_blknum(), hex::encode(node.prefix), depth + 1, 0, 0));
}
idx += 1;
key_off += node.suffix_len as usize;
}
Ok(())
}
Expand Down Expand Up @@ -754,8 +757,8 @@ mod tests {
}
}

#[test]
fn basic() -> Result<()> {
#[tokio::test]
async fn basic() -> Result<()> {
let mut disk = TestDisk::new();
let mut writer = DiskBtreeBuilder::<_, 6>::new(&mut disk);

Expand All @@ -775,7 +778,7 @@ mod tests {

let reader = DiskBtreeReader::new(0, root_offset, disk);

reader.dump()?;
reader.dump().await?;

// Test the `get` function on all the keys.
for (key, val) in all_data.iter() {
Expand Down Expand Up @@ -835,8 +838,8 @@ mod tests {
Ok(())
}

#[test]
fn lots_of_keys() -> Result<()> {
#[tokio::test]
async fn lots_of_keys() -> Result<()> {
let mut disk = TestDisk::new();
let mut writer = DiskBtreeBuilder::<_, 8>::new(&mut disk);

Expand All @@ -856,7 +859,7 @@ mod tests {

let reader = DiskBtreeReader::new(0, root_offset, disk);

reader.dump()?;
reader.dump().await?;

use std::sync::Mutex;

Expand Down Expand Up @@ -994,8 +997,8 @@ mod tests {
///
/// This test contains a particular data set, see disk_btree_test_data.rs
///
#[test]
fn particular_data() -> Result<()> {
#[tokio::test]
async fn particular_data() -> Result<()> {
// Build a tree from it
let mut disk = TestDisk::new();
let mut writer = DiskBtreeBuilder::<_, 26>::new(&mut disk);
Expand All @@ -1022,7 +1025,7 @@ mod tests {
})?;
assert_eq!(count, disk_btree_test_data::TEST_DATA.len());

reader.dump()?;
reader.dump().await?;

Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion pageserver/src/tenant/storage_layer/delta_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ impl Layer for DeltaLayer {
file,
);

tree_reader.dump()?;
tree_reader.dump().await?;

let mut cursor = file.block_cursor();

Expand Down
2 changes: 1 addition & 1 deletion pageserver/src/tenant/storage_layer/image_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ impl Layer for ImageLayer {
let tree_reader =
DiskBtreeReader::<_, KEY_SIZE>::new(inner.index_start_blk, inner.index_root_blk, file);

tree_reader.dump()?;
tree_reader.dump().await?;

tree_reader.visit(&[0u8; KEY_SIZE], VisitDirection::Forwards, |key, value| {
println!("key: {} offset {}", hex::encode(key), value);
Expand Down

1 comment on commit e5183f8

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1308 tests run: 1249 passed, 2 failed, 57 skipped (full report)


Failures on Posgres 15

  • test_threshold_based_eviction: release

Failures on Posgres 14

  • test_branch_creation_heavy_write[20]: release
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_branch_creation_heavy_write[20] or test_threshold_based_eviction[release-pg15]"

Please sign in to comment.