Skip to content

Commit

Permalink
layer file creation: fsync timeline directories using `VirtualFile::s…
Browse files Browse the repository at this point in the history
…ync_all()` (#6986)

Except for the involvement of the VirtualFile fd cache, this is
equivalent to what happened before at runtime.

Future PR #6378 will implement
`VirtualFile::sync_all()` using
tokio-epoll-uring if that's configured as the io engine.
This PR is preliminary work for that.

part of #6663
  • Loading branch information
problame authored Mar 4, 2024
1 parent e1c032f commit 944cac9
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 118 deletions.
1 change: 0 additions & 1 deletion pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ pub(crate) mod ephemeral_file;
pub mod layer_map;

pub mod metadata;
mod par_fsync;
pub mod remote_timeline_client;
pub mod storage_layer;

Expand Down
84 changes: 0 additions & 84 deletions pageserver/src/tenant/par_fsync.rs

This file was deleted.

79 changes: 46 additions & 33 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ use crate::tenant::timeline::logical_size::CurrentLogicalSize;
use crate::tenant::{
layer_map::{LayerMap, SearchResult},
metadata::TimelineMetadata,
par_fsync,
};
use crate::{
context::{AccessStatsBehavior, DownloadBehavior, RequestContext, RequestContextBuilder},
Expand All @@ -76,7 +75,7 @@ use crate::{
use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind};
use crate::{
pgdatadir_mapping::{AuxFilesDirectory, DirectoryKind},
virtual_file::MaybeFatalIo,
virtual_file::{MaybeFatalIo, VirtualFile},
};

use crate::config::PageServerConf;
Expand Down Expand Up @@ -3417,28 +3416,31 @@ impl Timeline {
let frozen_layer = Arc::clone(frozen_layer);
let ctx = ctx.attached_child();
move || {
// Write it out
// Keep this inside `spawn_blocking` and `Handle::current`
// as long as the write path is still sync and the read impl
// is still not fully async. Otherwise executor threads would
// be blocked.
let _g = span.entered();
let new_delta =
Handle::current().block_on(frozen_layer.write_to_disk(&self_clone, &ctx))?;

// The write_to_disk() above calls writer.finish() which already did the fsync of the inodes.
// We just need to fsync the directory in which these inodes are linked,
// which we know to be the timeline directory.
//
// We use fatal_err() below because the after write_to_disk returns with success,
// the in-memory state of the filesystem already has the layer file in its final place,
// and subsequent pageserver code could think it's durable while it really isn't.
par_fsync::par_fsync(&[self_clone
.conf
.timeline_path(&self_clone.tenant_shard_id, &self_clone.timeline_id)])
.fatal_err("fsync of timeline dir");

anyhow::Ok(new_delta)
Handle::current().block_on(
async move {
let new_delta = frozen_layer.write_to_disk(&self_clone, &ctx).await?;
// The write_to_disk() above calls writer.finish() which already did the fsync of the inodes.
// We just need to fsync the directory in which these inodes are linked,
// which we know to be the timeline directory.
//
// We use fatal_err() below because the after write_to_disk returns with success,
// the in-memory state of the filesystem already has the layer file in its final place,
// and subsequent pageserver code could think it's durable while it really isn't.
let timeline_dir =
VirtualFile::open(&self_clone.conf.timeline_path(
&self_clone.tenant_shard_id,
&self_clone.timeline_id,
))
.await
.fatal_err("VirtualFile::open for timeline dir fsync");
timeline_dir
.sync_all()
.await
.fatal_err("VirtualFile::sync_all timeline dir");
anyhow::Ok(new_delta)
}
.instrument(span),
)
}
})
.await
Expand Down Expand Up @@ -3672,11 +3674,17 @@ impl Timeline {
// We use fatal_err() below because the after writer.finish() returns with success,
// the in-memory state of the filesystem already has the layer file in its final place,
// and subsequent pageserver code could think it's durable while it really isn't.
par_fsync::par_fsync_async(&[self
.conf
.timeline_path(&self.tenant_shard_id, &self.timeline_id)])
let timeline_dir = VirtualFile::open(
&self
.conf
.timeline_path(&self.tenant_shard_id, &self.timeline_id),
)
.await
.fatal_err("fsync of timeline dir");
.fatal_err("VirtualFile::open for timeline dir fsync");
timeline_dir
.sync_all()
.await
.fatal_err("VirtualFile::sync_all timeline dir");
}

let mut guard = self.layers.write().await;
Expand Down Expand Up @@ -4265,12 +4273,17 @@ impl Timeline {
// We use fatal_err() below because the after writer.finish() returns with success,
// the in-memory state of the filesystem already has the layer file in its final place,
// and subsequent pageserver code could think it's durable while it really isn't.
let timeline_dir = self
.conf
.timeline_path(&self.tenant_shard_id, &self.timeline_id);
par_fsync::par_fsync_async(&[timeline_dir])
let timeline_dir = VirtualFile::open(
&self
.conf
.timeline_path(&self.tenant_shard_id, &self.timeline_id),
)
.await
.fatal_err("VirtualFile::open for timeline dir fsync");
timeline_dir
.sync_all()
.await
.fatal_err("fsync of timeline dir");
.fatal_err("VirtualFile::sync_all timeline dir");
}

stats.write_layer_files_micros = stats.read_lock_drop_micros.till_now();
Expand Down

1 comment on commit 944cac9

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2561 tests run: 2428 passed, 0 failed, 133 skipped (full report)


Code coverage* (full report)

  • functions: 28.7% (6934 of 24172 functions)
  • lines: 47.2% (42517 of 90097 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
944cac9 at 2024-03-04T14:32:45.361Z :recycle:

Please sign in to comment.