From 8e2e9f0fed000c1204b84a8dc9702ba28046938b Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 29 Oct 2024 22:24:04 +0000 Subject: [PATCH] pageserver: generation-aware storage for TenantManifest (#9555) ## Problem When tenant manifest objects are written without a generation suffix, concurrently attached pageservers may stamp on each others writes of the manifest and cause undefined behavior. Closes: #9543 ## Summary of changes - Use download_generation_object helper when reading manifests, to search for the most recent generation - Use Tenant::generation as the generation suffix when writing manifests. --- pageserver/src/tenant.rs | 9 ++- .../src/tenant/remote_timeline_client.rs | 26 +++++--- .../tenant/remote_timeline_client/download.rs | 59 ++++++++++++++----- 3 files changed, 66 insertions(+), 28 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 6ac11b0ae130..90d9feeeb604 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1352,14 +1352,15 @@ impl Tenant { ) .await?; let (offloaded_add, tenant_manifest) = - match remote_timeline_client::do_download_tenant_manifest( + match remote_timeline_client::download_tenant_manifest( remote_storage, &self.tenant_shard_id, + self.generation, &cancel, ) .await { - Ok((tenant_manifest, _generation)) => ( + Ok((tenant_manifest, _generation, _manifest_mtime)) => ( format!("{} offloaded", tenant_manifest.offloaded_timelines.len()), tenant_manifest, ), @@ -3130,8 +3131,6 @@ impl Tenant { } let tenant_manifest = self.build_tenant_manifest(); - // TODO: generation support - let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION; for child_shard in child_shards { tracing::info!( "Uploading tenant manifest for child {}", @@ -3140,7 +3139,7 @@ impl Tenant { upload_tenant_manifest( &self.remote_storage, child_shard, - generation, + self.generation, &tenant_manifest, &self.cancel, ) diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 19e762b9fae8..03ec18c8822e 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -190,6 +190,7 @@ use chrono::{NaiveDateTime, Utc}; pub(crate) use download::download_initdb_tar_zst; use pageserver_api::models::TimelineArchivalState; use pageserver_api::shard::{ShardIndex, TenantShardId}; +use regex::Regex; use scopeguard::ScopeGuard; use tokio_util::sync::CancellationToken; use utils::backoff::{ @@ -199,7 +200,7 @@ use utils::pausable_failpoint; use std::collections::{HashMap, VecDeque}; use std::sync::atomic::{AtomicU32, Ordering}; -use std::sync::{Arc, Mutex}; +use std::sync::{Arc, Mutex, OnceLock}; use std::time::Duration; use remote_storage::{ @@ -245,7 +246,7 @@ use super::upload_queue::{NotInitialized, SetDeletedFlagProgress}; use super::Generation; pub(crate) use download::{ - do_download_tenant_manifest, download_index_part, is_temp_download_file, + download_index_part, download_tenant_manifest, is_temp_download_file, list_remote_tenant_shards, list_remote_timelines, }; pub(crate) use index::LayerFileMetadata; @@ -274,12 +275,6 @@ pub(crate) const BUFFER_SIZE: usize = 32 * 1024; /// which we warn and skip. const DELETION_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_secs(10); -/// Hardcode a generation for the tenant manifest for now so that we don't -/// need to deal with generation-less manifests in the future. -/// -/// TODO: add proper generation support to all the places that use this. -pub(crate) const TENANT_MANIFEST_GENERATION: Generation = Generation::new(1); - pub enum MaybeDeletedIndexPart { IndexPart(IndexPart), Deleted(IndexPart), @@ -2239,6 +2234,12 @@ pub fn remote_tenant_manifest_path( RemotePath::from_string(&path).expect("Failed to construct path") } +/// Prefix to all generations' manifest objects in a tenant shard +pub fn remote_tenant_manifest_prefix(tenant_shard_id: &TenantShardId) -> RemotePath { + let path = format!("tenants/{tenant_shard_id}/tenant-manifest",); + RemotePath::from_string(&path).expect("Failed to construct path") +} + pub fn remote_timelines_path(tenant_shard_id: &TenantShardId) -> RemotePath { let path = format!("tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}"); RemotePath::from_string(&path).expect("Failed to construct path") @@ -2333,6 +2334,15 @@ pub fn parse_remote_index_path(path: RemotePath) -> Option { } } +/// Given the key of a tenant manifest, parse out the generation number +pub(crate) fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option { + static RE: OnceLock = OnceLock::new(); + let re = RE.get_or_init(|| Regex::new(r".+tenant-manifest-([0-9a-f]{8}).json").unwrap()); + re.captures(path.get_path().as_str()) + .and_then(|c| c.get(1)) + .and_then(|m| Generation::parse_suffix(m.as_str())) +} + #[cfg(test)] mod tests { use super::*; diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index 8679c68a2713..efcd20d1bf5c 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -20,7 +20,9 @@ use utils::backoff; use crate::config::PageServerConf; use crate::context::RequestContext; -use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; +use crate::span::{ + debug_assert_current_span_has_tenant_and_timeline_id, debug_assert_current_span_has_tenant_id, +}; use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path}; use crate::tenant::storage_layer::LayerName; use crate::tenant::Generation; @@ -36,9 +38,10 @@ use utils::pausable_failpoint; use super::index::{IndexPart, LayerFileMetadata}; use super::manifest::TenantManifest; use super::{ - parse_remote_index_path, remote_index_path, remote_initdb_archive_path, - remote_initdb_preserved_archive_path, remote_tenant_manifest_path, remote_tenant_path, - FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH, + parse_remote_index_path, parse_remote_tenant_manifest_path, remote_index_path, + remote_initdb_archive_path, remote_initdb_preserved_archive_path, remote_tenant_manifest_path, + remote_tenant_manifest_prefix, remote_tenant_path, FAILED_DOWNLOAD_WARN_THRESHOLD, + FAILED_REMOTE_OP_RETRIES, INITDB_PATH, }; /// @@ -365,32 +368,34 @@ async fn do_download_remote_path_retry_forever( .await } -pub async fn do_download_tenant_manifest( +async fn do_download_tenant_manifest( storage: &GenericRemoteStorage, tenant_shard_id: &TenantShardId, + _timeline_id: Option<&TimelineId>, + generation: Generation, cancel: &CancellationToken, -) -> Result<(TenantManifest, Generation), DownloadError> { - // TODO: generation support - let generation = super::TENANT_MANIFEST_GENERATION; +) -> Result<(TenantManifest, Generation, SystemTime), DownloadError> { let remote_path = remote_tenant_manifest_path(tenant_shard_id, generation); - let (manifest_bytes, _manifest_bytes_mtime) = + let (manifest_bytes, manifest_bytes_mtime) = do_download_remote_path_retry_forever(storage, &remote_path, cancel).await?; let tenant_manifest = TenantManifest::from_json_bytes(&manifest_bytes) .with_context(|| format!("deserialize tenant manifest file at {remote_path:?}")) .map_err(DownloadError::Other)?; - Ok((tenant_manifest, generation)) + Ok((tenant_manifest, generation, manifest_bytes_mtime)) } async fn do_download_index_part( storage: &GenericRemoteStorage, tenant_shard_id: &TenantShardId, - timeline_id: &TimelineId, + timeline_id: Option<&TimelineId>, index_generation: Generation, cancel: &CancellationToken, ) -> Result<(IndexPart, Generation, SystemTime), DownloadError> { + let timeline_id = + timeline_id.expect("A timeline ID is always provided when downloading an index"); let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation); let (index_part_bytes, index_part_mtime) = @@ -426,7 +431,7 @@ async fn do_download_index_part( pub(crate) async fn download_generation_object<'a, T, DF, DFF, PF>( storage: &'a GenericRemoteStorage, tenant_shard_id: &'a TenantShardId, - timeline_id: &'a TimelineId, + timeline_id: Option<&'a TimelineId>, my_generation: Generation, what: &str, prefix: RemotePath, @@ -438,7 +443,7 @@ where DF: Fn( &'a GenericRemoteStorage, &'a TenantShardId, - &'a TimelineId, + Option<&'a TimelineId>, Generation, &'a CancellationToken, ) -> DFF, @@ -446,7 +451,7 @@ where PF: Fn(RemotePath) -> Option, T: 'static, { - debug_assert_current_span_has_tenant_and_timeline_id(); + debug_assert_current_span_has_tenant_id(); if my_generation.is_none() { // Operating without generations: just fetch the generation-less path @@ -552,11 +557,13 @@ pub(crate) async fn download_index_part( my_generation: Generation, cancel: &CancellationToken, ) -> Result<(IndexPart, Generation, SystemTime), DownloadError> { + debug_assert_current_span_has_tenant_and_timeline_id(); + let index_prefix = remote_index_path(tenant_shard_id, timeline_id, Generation::none()); download_generation_object( storage, tenant_shard_id, - timeline_id, + Some(timeline_id), my_generation, "index_part", index_prefix, @@ -567,6 +574,28 @@ pub(crate) async fn download_index_part( .await } +pub(crate) async fn download_tenant_manifest( + storage: &GenericRemoteStorage, + tenant_shard_id: &TenantShardId, + my_generation: Generation, + cancel: &CancellationToken, +) -> Result<(TenantManifest, Generation, SystemTime), DownloadError> { + let manifest_prefix = remote_tenant_manifest_prefix(tenant_shard_id); + + download_generation_object( + storage, + tenant_shard_id, + None, + my_generation, + "tenant-manifest", + manifest_prefix, + do_download_tenant_manifest, + parse_remote_tenant_manifest_path, + cancel, + ) + .await +} + pub(crate) async fn download_initdb_tar_zst( conf: &'static PageServerConf, storage: &GenericRemoteStorage,