Skip to content

Commit

Permalink
fix: noisy logging when download gets cancelled during shutdown (#8224)
Browse files Browse the repository at this point in the history
Before this PR, during timeline shutdown, we'd occasionally see
log lines like this one:

```
2024-06-26T18:28:11.063402Z  INFO initial_size_calculation{tenant_id=$TENANT,shard_id=0000 timeline_id=$TIMELINE}:logical_size_calculation_task:get_or_maybe_download{layer=000000000000000000000000000000000000-000000067F0001A3950001C1630100000000__0000000D88265898}: layer file download failed, and caller has been cancelled: Cancelled, shutting down
Stack backtrace:
   0: <core::result::Result<T,F> as core::ops::try_trait::FromResidual<core::result::Result<core::convert::Infallible,E>>>::from_residual
             at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/core/src/result.rs:1964:27
      pageserver::tenant::remote_timeline_client::RemoteTimelineClient::download_layer_file::{{closure}}
             at /home/nonroot/pageserver/src/tenant/remote_timeline_client.rs:531:13
      pageserver::tenant::storage_layer::layer::LayerInner::download_and_init::{{closure}}
             at /home/nonroot/pageserver/src/tenant/storage_layer/layer.rs:1136:14
      pageserver::tenant::storage_layer::layer::LayerInner::download_init_and_wait::{{closure}}::{{closure}}
             at /home/nonroot/pageserver/src/tenant/storage_layer/layer.rs:1082:74
```

We can eliminate the anyhow backtrace with no loss of information
because the conversion to anyhow::Error happens in exactly one place.

refs #7427
  • Loading branch information
problame authored and VladLazar committed Jul 8, 2024
1 parent ccd0b79 commit 3cf2da9
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 14 deletions.
2 changes: 1 addition & 1 deletion pageserver/src/tenant/remote_timeline_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ impl RemoteTimelineClient {
local_path: &Utf8Path,
cancel: &CancellationToken,
ctx: &RequestContext,
) -> anyhow::Result<u64> {
) -> Result<u64, DownloadError> {
let downloaded_size = {
let _unfinished_gauge_guard = self.metrics.call_begin(
&RemoteOpFileKind::Layer,
Expand Down
17 changes: 4 additions & 13 deletions pageserver/src/tenant/storage_layer/layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1096,19 +1096,10 @@ impl LayerInner {

match rx.await {
Ok(Ok(res)) => Ok(res),
Ok(Err(e)) => {
// sleep already happened in the spawned task, if it was not cancelled
match e.downcast_ref::<remote_storage::DownloadError>() {
// If the download failed due to its cancellation token,
// propagate the cancellation error upstream.
Some(remote_storage::DownloadError::Cancelled) => {
Err(DownloadError::DownloadCancelled)
}
// FIXME: this is not embedding the error because historically it would had
// been output to compute, however that is no longer the case.
_ => Err(DownloadError::DownloadFailed),
}
Ok(Err(remote_storage::DownloadError::Cancelled)) => {
Err(DownloadError::DownloadCancelled)
}
Ok(Err(_)) => Err(DownloadError::DownloadFailed),
Err(_gone) => Err(DownloadError::DownloadCancelled),
}
}
Expand All @@ -1118,7 +1109,7 @@ impl LayerInner {
timeline: Arc<Timeline>,
permit: heavier_once_cell::InitPermit,
ctx: &RequestContext,
) -> anyhow::Result<Arc<DownloadedLayer>> {
) -> Result<Arc<DownloadedLayer>, remote_storage::DownloadError> {
let result = timeline
.remote_client
.download_layer_file(
Expand Down

0 comments on commit 3cf2da9

Please sign in to comment.