From ecfa3d9de9eec824800db55f5e9592fe0502c96e Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 4 Sep 2024 05:39:56 +0800 Subject: [PATCH] fix(storage-scrubber): wrong trial condition (#8905) ref https://github.com/neondatabase/neon/issues/8872 ## Summary of changes We saw stuck storage scrubber in staging caused by infinite retries. I believe here we should use `min` instead of `max` to avoid getting minutes or hours of retry backoff. Signed-off-by: Alex Chi Z --- storage_scrubber/src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs index 3c21d2f8cf13..3f08cddf50f9 100644 --- a/storage_scrubber/src/lib.rs +++ b/storage_scrubber/src/lib.rs @@ -422,7 +422,7 @@ fn stream_objects_with_retries<'a>( let yield_err = if err.is_permanent() { true } else { - let backoff_time = 1 << trial.max(5); + let backoff_time = 1 << trial.min(5); tokio::time::sleep(Duration::from_secs(backoff_time)).await; trial += 1; trial == MAX_RETRIES - 1 @@ -473,7 +473,7 @@ async fn list_objects_with_retries( s3_target.delimiter, DisplayErrorContext(e), ); - let backoff_time = 1 << trial.max(5); + let backoff_time = 1 << trial.min(5); tokio::time::sleep(Duration::from_secs(backoff_time)).await; } } @@ -492,7 +492,7 @@ async fn download_object_with_retries( Ok(response) => response, Err(e) => { error!("Failed to download object for key {key}: {e}"); - let backoff_time = 1 << trial.max(5); + let backoff_time = 1 << trial.min(5); tokio::time::sleep(Duration::from_secs(backoff_time)).await; continue; } @@ -508,7 +508,7 @@ async fn download_object_with_retries( } Err(e) => { error!("Failed to stream object body for key {key}: {e}"); - let backoff_time = 1 << trial.max(5); + let backoff_time = 1 << trial.min(5); tokio::time::sleep(Duration::from_secs(backoff_time)).await; } }