Skip to content

Commit

Permalink
fix(pageserver): abort process if fsync fails (#9108)
Browse files Browse the repository at this point in the history
close #8140

The original issue is rather vague on what we should do. After
discussion w/ @problame we decided to narrow down the problems we want
to solve in that issue.

* read path -- do not panic for now.
* write path -- panic only on write errors (i.e., device error, fsync
error), but not on no-space for now.

The guideline is that if the pageserver behavior could lead to violation
of persistent constraints (i.e., return an operation as successful but
not actually persisting things), we should panic. Fsync is the place
where both of us agree that we should panic, because if fsync fails, the
kernel will mark dirty pages as clean, and the next fsync will not
necessarily return false. This would make the storage client assume the
operation is successful.

## Summary of changes

Make fsync panic on fatal errors.

---------

Signed-off-by: Alex Chi Z <[email protected]>
  • Loading branch information
skyzh authored Sep 27, 2024
1 parent cf6a776 commit cde1654
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 7 deletions.
6 changes: 5 additions & 1 deletion pageserver/src/tenant/mgr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,11 @@ async fn safe_rename_tenant_dir(path: impl AsRef<Utf8Path>) -> std::io::Result<U
+ TEMP_FILE_SUFFIX;
let tmp_path = path_with_suffix_extension(&path, &rand_suffix);
fs::rename(path.as_ref(), &tmp_path).await?;
fs::File::open(parent).await?.sync_all().await?;
fs::File::open(parent)
.await?
.sync_all()
.await
.maybe_fatal_err("safe_rename_tenant_dir")?;
Ok(tmp_path)
}

Expand Down
3 changes: 3 additions & 0 deletions pageserver/src/tenant/remote_timeline_client/download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,15 @@ async fn download_object<'a>(
destination_file
.flush()
.await
.maybe_fatal_err("download_object sync_all")
.with_context(|| format!("flush source file at {dst_path}"))
.map_err(DownloadError::Other)?;

// not using sync_data because it can lose file size update
destination_file
.sync_all()
.await
.maybe_fatal_err("download_object sync_all")
.with_context(|| format!("failed to fsync source file at {dst_path}"))
.map_err(DownloadError::Other)?;

Expand Down Expand Up @@ -232,6 +234,7 @@ async fn download_object<'a>(
destination_file
.sync_all()
.await
.maybe_fatal_err("download_object sync_all")
.with_context(|| format!("failed to fsync source file at {dst_path}"))
.map_err(DownloadError::Other)?;

Expand Down
6 changes: 4 additions & 2 deletions pageserver/src/tenant/storage_layer/delta_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use crate::tenant::vectored_blob_io::{
};
use crate::tenant::PageReconstructError;
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
use crate::virtual_file::{self, VirtualFile};
use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
use crate::{walrecord, TEMP_FILE_SUFFIX};
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
use anyhow::{anyhow, bail, ensure, Context, Result};
Expand Down Expand Up @@ -589,7 +589,9 @@ impl DeltaLayerWriterInner {
);

// fsync the file
file.sync_all().await?;
file.sync_all()
.await
.maybe_fatal_err("delta_layer sync_all")?;

trace!("created delta layer {}", self.path);

Expand Down
6 changes: 4 additions & 2 deletions pageserver/src/tenant/storage_layer/image_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ use crate::tenant::vectored_blob_io::{
};
use crate::tenant::PageReconstructError;
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
use crate::virtual_file::{self, VirtualFile};
use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
use anyhow::{anyhow, bail, ensure, Context, Result};
use bytes::{Bytes, BytesMut};
Expand Down Expand Up @@ -889,7 +889,9 @@ impl ImageLayerWriterInner {
// set inner.file here. The first read will have to re-open it.

// fsync the file
file.sync_all().await?;
file.sync_all()
.await
.maybe_fatal_err("image_layer sync_all")?;

trace!("created image layer {}", self.path);

Expand Down
5 changes: 3 additions & 2 deletions pageserver/src/virtual_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ impl VirtualFile {
&[]
};
utils::crashsafe::overwrite(&final_path, &tmp_path, content)
.maybe_fatal_err("crashsafe_overwrite")
})
.await
.expect("blocking task is never aborted")
Expand All @@ -475,15 +476,15 @@ impl VirtualFile {
pub async fn sync_all(&self) -> Result<(), Error> {
with_file!(self, StorageIoOperation::Fsync, |file_guard| {
let (_file_guard, res) = io_engine::get().sync_all(file_guard).await;
res
res.maybe_fatal_err("sync_all")
})
}

/// Call File::sync_data() on the underlying File.
pub async fn sync_data(&self) -> Result<(), Error> {
with_file!(self, StorageIoOperation::Fsync, |file_guard| {
let (_file_guard, res) = io_engine::get().sync_data(file_guard).await;
res
res.maybe_fatal_err("sync_data")
})
}

Expand Down

1 comment on commit cde1654

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5013 tests run: 4850 passed, 5 failed, 158 skipped (full report)


Failures on Postgres 17

Failures on Postgres 16

# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_subscriber_restart[release-pg16] or test_sharding_split_failures[debug-pg17-failure16] or test_background_operation_cancellation[release-pg17] or test_subscriber_restart[release-pg17] or test_subscriber_restart[debug-pg17]"
Flaky tests (19)

Postgres 17

  • test_ondemand_wal_download_in_replication_slot_funcs: debug-x86-64

Postgres 16

Postgres 15

Postgres 14

Test coverage report is not available

The comment gets automatically updated with the latest test results
cde1654 at 2024-09-27T19:43:53.210Z :recycle:

Please sign in to comment.