From 52b02d95c801855dfd462f767bf551cbe2142663 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 29 Jul 2024 17:50:44 +0100 Subject: [PATCH] scrubber: enable cleaning up garbage tenants from known deletion bugs, add object age safety check (#8461) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem Old storage buckets can contain a lot of tenants that aren't known to the control plane at all, because they belonged to test jobs that get their control plane state cleaned up shortly after running. In general, it's somewhat unsafe to purge these, as it's hard to distinguish "control plane doesn't know about this, so it's garbage" from "control plane said it didn't know about this, which is a bug in the scrubber, control plane, or API URL configured". However, the most common case is that we see only a small husk of a tenant in S3 from a specific old behavior of the software, for example: - We had a bug where heatmaps weren't deleted on tenant delete - When WAL DR was first deployed, we didn't delete initdb.tar.zst on tenant deletion ## Summary of changes - Add a KnownBug variant for the garbage reason - Include such cases in the "safe" deletion mode (`--mode=deleted`) - Add code that inspects tenants missing in control plane to identify cases of known bugs (this is kind of slow, but should go away once we've cleaned all these up) - Add an additional `-min-age` safety check similar to physical GC, where even if everything indicates objects aren't needed, we won't delete something that has been modified too recently. --------- Co-authored-by: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Co-authored-by: Arpad Müller --- storage_scrubber/src/garbage.rs | 118 ++++++++++++++++++++++++++++++-- storage_scrubber/src/main.rs | 10 ++- 2 files changed, 121 insertions(+), 7 deletions(-) diff --git a/storage_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs index 333269ec7e5e..78ecfc72329c 100644 --- a/storage_scrubber/src/garbage.rs +++ b/storage_scrubber/src/garbage.rs @@ -5,6 +5,7 @@ use std::{ collections::{HashMap, HashSet}, sync::Arc, + time::Duration, }; use anyhow::Context; @@ -18,7 +19,7 @@ use utils::id::TenantId; use crate::{ cloud_admin_api::{CloudAdminApiClient, MaybeDeleted, ProjectData}, - init_remote, init_remote_generic, + init_remote, init_remote_generic, list_objects_with_retries, metadata_stream::{stream_tenant_timelines, stream_tenants}, BucketConfig, ConsoleConfig, NodeKind, TenantShardTimelineId, TraversingDepth, }; @@ -27,6 +28,11 @@ use crate::{ enum GarbageReason { DeletedInConsole, MissingInConsole, + + // The remaining data relates to a known deletion issue, and we're sure that purging this + // will not delete any real data, for example https://github.com/neondatabase/neon/pull/7928 where + // there is nothing in a tenant path apart from a heatmap file. + KnownBug, } #[derive(Serialize, Deserialize, Debug)] @@ -72,6 +78,15 @@ impl GarbageList { } } + /// If an entity has been identified as requiring purge due to a known bug, e.g. + /// a particular type of object left behind after an incomplete deletion. + fn append_buggy(&mut self, entity: GarbageEntity) { + self.items.push(GarbageItem { + entity, + reason: GarbageReason::KnownBug, + }); + } + /// Return true if appended, false if not. False means the result was not garbage. fn maybe_append(&mut self, entity: GarbageEntity, result: Option) -> bool where @@ -219,6 +234,71 @@ async fn find_garbage_inner( assert!(project.tenant == tenant_shard_id.tenant_id); } + // Special case: If it's missing in console, check for known bugs that would enable us to conclusively + // identify it as purge-able anyway + if console_result.is_none() { + let timelines = stream_tenant_timelines(&s3_client, &target, tenant_shard_id) + .await? + .collect::>() + .await; + if timelines.is_empty() { + // No timelines, but a heatmap: the deletion bug where we deleted everything but heatmaps + let tenant_objects = list_objects_with_retries( + &s3_client, + &target.tenant_root(&tenant_shard_id), + None, + ) + .await?; + let object = tenant_objects.contents.as_ref().unwrap().first().unwrap(); + if object.key.as_ref().unwrap().ends_with("heatmap-v1.json") { + tracing::info!("Tenant {tenant_shard_id}: is missing in console and is only a heatmap (known historic deletion bug)"); + garbage.append_buggy(GarbageEntity::Tenant(tenant_shard_id)); + continue; + } else { + tracing::info!("Tenant {tenant_shard_id} is missing in console and contains one object: {}", object.key.as_ref().unwrap()); + } + } else { + // A console-unknown tenant with timelines: check if these timelines only contain initdb.tar.zst, from the initial + // rollout of WAL DR in which we never deleted these. + let mut any_non_initdb = false; + + for timeline_r in timelines { + let timeline = timeline_r?; + let timeline_objects = list_objects_with_retries( + &s3_client, + &target.timeline_root(&timeline), + None, + ) + .await?; + if timeline_objects + .common_prefixes + .as_ref() + .map(|v| v.len()) + .unwrap_or(0) + > 0 + { + // Sub-paths? Unexpected + any_non_initdb = true; + } else { + let object = timeline_objects.contents.as_ref().unwrap().first().unwrap(); + if object.key.as_ref().unwrap().ends_with("initdb.tar.zst") { + tracing::info!("Timeline {timeline} contains only initdb.tar.zst"); + } else { + any_non_initdb = true; + } + } + } + + if any_non_initdb { + tracing::info!("Tenant {tenant_shard_id}: is missing in console and contains timelines, one or more of which are more than just initdb"); + } else { + tracing::info!("Tenant {tenant_shard_id}: is missing in console and contains only timelines that only contain initdb"); + garbage.append_buggy(GarbageEntity::Tenant(tenant_shard_id)); + continue; + } + } + } + if garbage.maybe_append(GarbageEntity::Tenant(tenant_shard_id), console_result) { tracing::debug!("Tenant {tenant_shard_id} is garbage"); } else { @@ -349,9 +429,6 @@ pub async fn get_timeline_objects( tracing::debug!("Listing objects in timeline {ttid}"); let timeline_root = super::remote_timeline_path_id(&ttid); - // TODO: apply extra validation based on object modification time. Don't purge - // timelines whose index_part.json has been touched recently. - let list = s3_client .list( Some(&timeline_root), @@ -422,6 +499,7 @@ impl DeletionProgressTracker { pub async fn purge_garbage( input_path: String, mode: PurgeMode, + min_age: Duration, dry_run: bool, ) -> anyhow::Result<()> { let list_bytes = tokio::fs::read(&input_path).await?; @@ -459,6 +537,7 @@ pub async fn purge_garbage( .filter(|i| match (&mode, &i.reason) { (PurgeMode::DeletedAndMissing, _) => true, (PurgeMode::DeletedOnly, GarbageReason::DeletedInConsole) => true, + (PurgeMode::DeletedOnly, GarbageReason::KnownBug) => true, (PurgeMode::DeletedOnly, GarbageReason::MissingInConsole) => false, }); @@ -487,6 +566,37 @@ pub async fn purge_garbage( let mut progress_tracker = DeletionProgressTracker::default(); while let Some(result) = get_objects_results.next().await { let mut object_list = result?; + + // Extra safety check: even if a collection of objects is garbage, check max() of modification + // times before purging, so that if we incorrectly marked a live tenant as garbage then we would + // notice that its index has been written recently and would omit deleting it. + if object_list.is_empty() { + // Simplify subsequent code by ensuring list always has at least one item + // Usually, this only occurs if there is parallel deletions racing us, as there is no empty prefixes + continue; + } + let max_mtime = object_list.iter().map(|o| o.last_modified).max().unwrap(); + let age = max_mtime.elapsed(); + match age { + Err(_) => { + tracing::warn!("Bad last_modified time"); + continue; + } + Ok(a) if a < min_age => { + // Failed age check. This doesn't mean we did something wrong: a tenant might really be garbage and recently + // written, but out of an abundance of caution we still don't purge it. + tracing::info!( + "Skipping tenant with young objects {}..{}", + object_list.first().as_ref().unwrap().key, + object_list.last().as_ref().unwrap().key + ); + continue; + } + Ok(_) => { + // Passed age check + } + } + objects_to_delete.append(&mut object_list); if objects_to_delete.len() >= MAX_KEYS_PER_DELETE { do_delete( diff --git a/storage_scrubber/src/main.rs b/storage_scrubber/src/main.rs index b3ed6f645177..346829b7c964 100644 --- a/storage_scrubber/src/main.rs +++ b/storage_scrubber/src/main.rs @@ -50,6 +50,8 @@ enum Command { input_path: String, #[arg(short, long, default_value_t = PurgeMode::DeletedOnly)] mode: PurgeMode, + #[arg(long = "min-age")] + min_age: humantime::Duration, }, #[command(verbatim_doc_comment)] ScanMetadata { @@ -196,9 +198,11 @@ async fn main() -> anyhow::Result<()> { let console_config = ConsoleConfig::from_env()?; find_garbage(bucket_config, console_config, depth, node_kind, output_path).await } - Command::PurgeGarbage { input_path, mode } => { - purge_garbage(input_path, mode, !cli.delete).await - } + Command::PurgeGarbage { + input_path, + mode, + min_age, + } => purge_garbage(input_path, mode, min_age.into(), !cli.delete).await, Command::TenantSnapshot { tenant_id, output_path,