Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support tenant manifests in the scrubber #9942

Merged
merged 16 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pageserver/src/tenant/remote_timeline_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2564,9 +2564,9 @@ pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
}

/// Given the key of a tenant manifest, parse out the generation number
pub(crate) fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation> {
pub fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation> {
static RE: OnceLock<Regex> = OnceLock::new();
let re = RE.get_or_init(|| Regex::new(r".+tenant-manifest-([0-9a-f]{8}).json").unwrap());
let re = RE.get_or_init(|| Regex::new(r".*tenant-manifest-([0-9a-f]{8}).json").unwrap());
problame marked this conversation as resolved.
Show resolved Hide resolved
re.captures(path.get_path().as_str())
.and_then(|c| c.get(1))
.and_then(|m| Generation::parse_suffix(m.as_str()))
Expand Down
2 changes: 1 addition & 1 deletion pageserver/src/tenant/remote_timeline_client/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ impl TenantManifest {
offloaded_timelines: vec![],
}
}
pub(crate) fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
pub fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
serde_json::from_slice::<Self>(bytes)
}

Expand Down
135 changes: 134 additions & 1 deletion storage_scrubber/src/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,21 @@ use itertools::Itertools;
use pageserver::tenant::checks::check_valid_layermap;
use pageserver::tenant::layer_map::LayerMap;
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
use pageserver::tenant::remote_timeline_client::manifest::TenantManifest;
use pageserver_api::shard::ShardIndex;
use tokio_util::sync::CancellationToken;
use tracing::{info, warn};
use utils::generation::Generation;
use utils::id::TimelineId;
use utils::shard::TenantShardId;

use crate::cloud_admin_api::BranchData;
use crate::metadata_stream::stream_listing;
use crate::{download_object_with_retries, RootTarget, TenantShardTimelineId};
use futures_util::StreamExt;
use pageserver::tenant::remote_timeline_client::{parse_remote_index_path, remote_layer_path};
use pageserver::tenant::remote_timeline_client::{
parse_remote_index_path, parse_remote_tenant_manifest_path, remote_layer_path,
};
use pageserver::tenant::storage_layer::LayerName;
use pageserver::tenant::IndexPart;
use remote_storage::{GenericRemoteStorage, ListingObject, RemotePath};
Expand Down Expand Up @@ -527,3 +531,132 @@ async fn list_timeline_blobs_impl(
unknown_keys,
}))
}

pub(crate) struct RemoteTenantManifestInfo {
pub(crate) latest_generation: Option<Generation>,
pub(crate) manifests: Vec<(Generation, ListingObject)>,
}

pub(crate) enum ListTenantManifestResult {
WithErrors {
errors: Vec<(String, String)>,
#[allow(dead_code)]
unknown_keys: Vec<ListingObject>,
},
NoErrors(RemoteTenantManifestInfo),
}

/// Lists the tenant manifests in remote storage and parses the latest one, returning a [`ListTenantManifestResult`] object.
pub(crate) async fn list_tenant_manifests(
remote_client: &GenericRemoteStorage,
tenant_id: TenantShardId,
root_target: &RootTarget,
) -> anyhow::Result<ListTenantManifestResult> {
let mut errors = Vec::new();
let mut unknown_keys = Vec::new();

let mut tenant_root_target = root_target.tenant_root(&tenant_id);
let original_prefix = tenant_root_target.prefix_in_bucket.clone();
const TENANT_MANIFEST_STEM: &str = "tenant-manifest";
tenant_root_target.prefix_in_bucket += TENANT_MANIFEST_STEM;
tenant_root_target.delimiter = String::new();

let mut manifests: Vec<(Generation, ListingObject)> = Vec::new();

let prefix_str = &original_prefix
.strip_prefix("/")
.unwrap_or(&original_prefix);

let mut stream = std::pin::pin!(stream_listing(remote_client, &tenant_root_target));
'outer: while let Some(obj) = stream.next().await {
let (key, Some(obj)) = obj? else {
panic!("ListingObject not specified");
};

'err: {
// TODO a let chain would be nicer here.
let Some(name) = key.object_name() else {
break 'err;
};
if !name.starts_with(TENANT_MANIFEST_STEM) {
break 'err;
}
let Some(generation) = parse_remote_tenant_manifest_path(key.clone()) else {
break 'err;
};
problame marked this conversation as resolved.
Show resolved Hide resolved
tracing::debug!("tenant manifest {key}");
manifests.push((generation, obj));
continue 'outer;
}
tracing::info!("Listed an unknown key: {key}");
unknown_keys.push(obj);
}

if manifests.is_empty() {
tracing::debug!("No manifest for timeline.");

return Ok(ListTenantManifestResult::WithErrors {
errors,
unknown_keys,
});
}
if !unknown_keys.is_empty() {
errors.push(((*prefix_str).to_owned(), "unknown keys listed".to_string()));

return Ok(ListTenantManifestResult::WithErrors {
errors,
unknown_keys,
});
}

// Find the manifest with the highest generation
let (latest_generation, latest_listing_object) = manifests
.iter()
.max_by_key(|i| i.0)
.map(|(g, obj)| (*g, obj.clone()))
.unwrap();
problame marked this conversation as resolved.
Show resolved Hide resolved

let manifest_bytes =
match download_object_with_retries(remote_client, &latest_listing_object.key).await {
Ok(bytes) => bytes,
Err(e) => {
// It is possible that the tenant gets deleted in-between we list the objects
// and we download the manifest file.
errors.push((
latest_listing_object.key.get_path().as_str().to_owned(),
format!("failed to download tenant-manifest.json: {e}"),
));
return Ok(ListTenantManifestResult::WithErrors {
errors,
unknown_keys,
});
}
};

match TenantManifest::from_json_bytes(&manifest_bytes) {
Ok(_manifest) => {
return Ok(ListTenantManifestResult::NoErrors(
RemoteTenantManifestInfo {
latest_generation: Some(latest_generation),
manifests,
},
));
}
Err(parse_error) => errors.push((
latest_listing_object.key.get_path().as_str().to_owned(),
format!("tenant-manifest.json body parsing error: {parse_error}"),
)),
}

if errors.is_empty() {
errors.push((
(*prefix_str).to_owned(),
"Unexpected: no errors did not lead to a successfully parsed blob return".to_string(),
));
}

Ok(ListTenantManifestResult::WithErrors {
errors,
unknown_keys,
})
}
Loading
Loading