Skip to content

Commit

Permalink
remove init_order delay for init logical size calculations
Browse files Browse the repository at this point in the history
The priority mechanism introduced earlier should take care of it.
  • Loading branch information
problame committed Dec 1, 2023
1 parent 2847983 commit 5dec9f4
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 112 deletions.
23 changes: 1 addition & 22 deletions pageserver/src/bin/pageserver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,15 +402,11 @@ fn start_pageserver(
let (init_remote_done_tx, init_remote_done_rx) = utils::completion::channel();
let (init_done_tx, init_done_rx) = utils::completion::channel();

let (init_logical_size_done_tx, init_logical_size_done_rx) = utils::completion::channel();

let (background_jobs_can_start, background_jobs_barrier) = utils::completion::channel();

let order = pageserver::InitializationOrder {
initial_tenant_load_remote: Some(init_done_tx),
initial_tenant_load: Some(init_remote_done_tx),
initial_logical_size_can_start: init_done_rx.clone(),
initial_logical_size_attempt: Some(init_logical_size_done_tx),
background_jobs_can_start: background_jobs_barrier.clone(),
};

Expand Down Expand Up @@ -464,7 +460,7 @@ fn start_pageserver(
});

let WaitForPhaseResult {
timeout_remaining: timeout,
timeout_remaining: _timeout,
skipped: init_load_skipped,
} = wait_for_phase("initial_tenant_load", init_load_done, timeout).await;

Expand All @@ -476,20 +472,6 @@ fn start_pageserver(
tracing::info!("Cancelled before initial logical sizes completed")
});

let logical_sizes_done = std::pin::pin!(async {
init_logical_size_done_rx.wait().await;
startup_checkpoint(
started_startup_at,
"initial_logical_sizes",
"Initial logical sizes completed",
);
});

let WaitForPhaseResult {
timeout_remaining: _,
skipped: logical_sizes_skipped,
} = wait_for_phase("initial_logical_sizes", logical_sizes_done, timeout).await;

scopeguard::ScopeGuard::into_inner(guard);

// allow background jobs to start: we either completed prior stages, or they reached timeout
Expand All @@ -514,9 +496,6 @@ fn start_pageserver(
if let Some(f) = init_load_skipped {
f.await;
}
if let Some(f) = logical_sizes_skipped {
f.await;
}
scopeguard::ScopeGuard::into_inner(guard);

startup_checkpoint(started_startup_at, "complete", "Startup complete");
Expand Down
7 changes: 0 additions & 7 deletions pageserver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,6 @@ pub struct InitializationOrder {
/// Each initial tenant load task carries this until completion.
pub initial_tenant_load: Option<utils::completion::Completion>,

/// Barrier for when we can start initial logical size calculations.
pub initial_logical_size_can_start: utils::completion::Barrier,

/// Each timeline owns a clone of this to be consumed on the initial logical size calculation
/// attempt. It is important to drop this once the attempt has completed.
pub initial_logical_size_attempt: Option<utils::completion::Completion>,

/// Barrier for when we can start any background jobs.
///
/// This can be broken up later on, but right now there is just one class of a background job.
Expand Down
63 changes: 11 additions & 52 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,6 @@ impl Tenant {
index_part: Option<IndexPart>,
metadata: TimelineMetadata,
ancestor: Option<Arc<Timeline>>,
init_order: Option<&InitializationOrder>,
_ctx: &RequestContext,
) -> anyhow::Result<()> {
let tenant_id = self.tenant_shard_id;
Expand All @@ -479,7 +478,6 @@ impl Tenant {
&metadata,
ancestor.clone(),
resources,
init_order,
CreateTimelineCause::Load,
)?;
let disk_consistent_lsn = timeline.get_disk_consistent_lsn();
Expand Down Expand Up @@ -680,10 +678,6 @@ impl Tenant {
// as we are no longer loading, signal completion by dropping
// the completion while we resume deletion
drop(_completion);
// do not hold to initial_logical_size_attempt as it will prevent loading from proceeding without timeout
let _ = init_order
.as_mut()
.and_then(|x| x.initial_logical_size_attempt.take());
let background_jobs_can_start =
init_order.as_ref().map(|x| &x.background_jobs_can_start);
if let Some(background) = background_jobs_can_start {
Expand All @@ -697,7 +691,6 @@ impl Tenant {
&tenant_clone,
preload,
tenants,
init_order,
&ctx,
)
.await
Expand All @@ -710,7 +703,7 @@ init_order,
}
}

match tenant_clone.attach(init_order, preload, &ctx).await {
match tenant_clone.attach(preload, &ctx).await {
Ok(()) => {
info!("attach finished, activating");
tenant_clone.activate(broker_client, None, &ctx);
Expand Down Expand Up @@ -773,7 +766,6 @@ init_order,
///
async fn attach(
self: &Arc<Tenant>,
init_order: Option<InitializationOrder>,
preload: Option<TenantPreload>,
ctx: &RequestContext,
) -> anyhow::Result<()> {
Expand All @@ -786,7 +778,7 @@ init_order,
None => {
// Deprecated dev mode: load from local disk state instead of remote storage
// https://github.com/neondatabase/neon/issues/5624
return self.load_local(init_order, ctx).await;
return self.load_local(ctx).await;
}
};

Expand Down Expand Up @@ -881,7 +873,6 @@ init_order,
&index_part.metadata,
Some(remote_timeline_client),
self.deletion_queue_client.clone(),
None,
)
.await
.context("resume_deletion")
Expand Down Expand Up @@ -1006,10 +997,6 @@ init_order,
None
};

// we can load remote timelines during init, but they are assumed to be so rare that
// initialization order is not passed to here.
let init_order = None;

// timeline loading after attach expects to find metadata file for each metadata
save_metadata(
self.conf,
Expand All @@ -1027,7 +1014,6 @@ init_order,
Some(index_part),
remote_metadata,
ancestor,
init_order,
ctx,
)
.await
Expand Down Expand Up @@ -1269,11 +1255,7 @@ init_order,
/// files on disk. Used at pageserver startup.
///
/// No background tasks are started as part of this routine.
async fn load_local(
self: &Arc<Tenant>,
init_order: Option<InitializationOrder>,
ctx: &RequestContext,
) -> anyhow::Result<()> {
async fn load_local(self: &Arc<Tenant>, ctx: &RequestContext) -> anyhow::Result<()> {
span::debug_assert_current_span_has_tenant_id();

debug!("loading tenant task");
Expand All @@ -1299,7 +1281,7 @@ init_order,
// Process loadable timelines first
for (timeline_id, local_metadata) in scan.sorted_timelines_to_load {
if let Err(e) = self
.load_local_timeline(timeline_id, local_metadata, init_order.as_ref(), ctx, false)
.load_local_timeline(timeline_id, local_metadata, ctx, false)
.await
{
match e {
Expand Down Expand Up @@ -1333,13 +1315,7 @@ init_order,
}
Some(local_metadata) => {
if let Err(e) = self
.load_local_timeline(
timeline_id,
local_metadata,
init_order.as_ref(),
ctx,
true,
)
.load_local_timeline(timeline_id, local_metadata, ctx, true)
.await
{
match e {
Expand Down Expand Up @@ -1367,12 +1343,11 @@ init_order,
/// Subroutine of `load_tenant`, to load an individual timeline
///
/// NB: The parent is assumed to be already loaded!
#[instrument(skip(self, local_metadata, init_order, ctx))]
#[instrument(skip(self, local_metadata, ctx))]
async fn load_local_timeline(
self: &Arc<Self>,
timeline_id: TimelineId,
local_metadata: TimelineMetadata,
init_order: Option<&InitializationOrder>,
ctx: &RequestContext,
found_delete_mark: bool,
) -> Result<(), LoadLocalTimelineError> {
Expand All @@ -1389,7 +1364,6 @@ init_order,
&local_metadata,
None,
self.deletion_queue_client.clone(),
init_order,
)
.await
.context("resume deletion")
Expand All @@ -1406,17 +1380,9 @@ init_order,
None
};

self.timeline_init_and_sync(
timeline_id,
resources,
None,
local_metadata,
ancestor,
init_order,
ctx,
)
.await
.map_err(LoadLocalTimelineError::Load)
self.timeline_init_and_sync(timeline_id, resources, None, local_metadata, ancestor, ctx)
.await
.map_err(LoadLocalTimelineError::Load)
}

pub(crate) fn tenant_id(&self) -> TenantId {
Expand Down Expand Up @@ -2311,7 +2277,6 @@ impl Tenant {
new_metadata: &TimelineMetadata,
ancestor: Option<Arc<Timeline>>,
resources: TimelineResources,
init_order: Option<&InitializationOrder>,
cause: CreateTimelineCause,
) -> anyhow::Result<Arc<Timeline>> {
let state = match cause {
Expand All @@ -2326,9 +2291,6 @@ impl Tenant {
CreateTimelineCause::Delete => TimelineState::Stopping,
};

let initial_logical_size_can_start = init_order.map(|x| &x.initial_logical_size_can_start);
let initial_logical_size_attempt = init_order.map(|x| &x.initial_logical_size_attempt);

let pg_version = new_metadata.pg_version();

let timeline = Timeline::new(
Expand All @@ -2342,8 +2304,6 @@ impl Tenant {
Arc::clone(&self.walredo_mgr),
resources,
pg_version,
initial_logical_size_can_start.cloned(),
initial_logical_size_attempt.cloned().flatten(),
state,
self.cancel.child_token(),
);
Expand Down Expand Up @@ -3165,7 +3125,6 @@ impl Tenant {
new_metadata,
ancestor,
resources,
None,
CreateTimelineCause::Load,
)
.context("Failed to create timeline data structure")?;
Expand Down Expand Up @@ -3837,7 +3796,7 @@ pub(crate) mod harness {
match mode {
LoadMode::Local => {
tenant
.load_local(None, ctx)
.load_local(ctx)
.instrument(info_span!("try_load", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
.await?;
}
Expand All @@ -3847,7 +3806,7 @@ pub(crate) mod harness {
.instrument(info_span!("try_load_preload", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
.await?;
tenant
.attach(None, Some(preload), ctx)
.attach(Some(preload), ctx)
.instrument(info_span!("try_load", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
.await?;
}
Expand Down
7 changes: 1 addition & 6 deletions pageserver/src/tenant/delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ use crate::{
context::RequestContext,
task_mgr::{self, TaskKind},
tenant::mgr::{TenantSlot, TenantsMapRemoveResult},
InitializationOrder,
};

use super::{
Expand Down Expand Up @@ -390,7 +389,6 @@ impl DeleteTenantFlow {
tenant: &Arc<Tenant>,
preload: Option<TenantPreload>,
tenants: &'static std::sync::RwLock<TenantsMap>,
init_order: Option<InitializationOrder>,
ctx: &RequestContext,
) -> Result<(), DeleteTenantError> {
let (_, progress) = completion::channel();
Expand All @@ -400,10 +398,7 @@ impl DeleteTenantFlow {
.await
.expect("cant be stopping or broken");

tenant
.attach(init_order, preload, ctx)
.await
.context("attach")?;
tenant.attach(preload, ctx).await.context("attach")?;

Self::background(
guard,
Expand Down
22 changes: 0 additions & 22 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,6 @@ pub struct Timeline {

eviction_task_timeline_state: tokio::sync::Mutex<EvictionTaskTimelineState>,

/// Barrier to wait before doing initial logical size calculation. Used only during startup.
initial_logical_size_can_start: Option<completion::Barrier>,

/// Completion shared between all timelines loaded during startup; used to delay heavier
/// background tasks until some logical sizes have been calculated.
initial_logical_size_attempt: Mutex<Option<completion::Completion>>,

/// Load or creation time information about the disk_consistent_lsn and when the loading
/// happened. Used for consumption metrics.
pub(crate) loaded_at: (Lsn, SystemTime),
Expand Down Expand Up @@ -1021,17 +1014,6 @@ impl Timeline {
error!("Not activating a Stopping timeline");
}
(_, new_state) => {
if matches!(
new_state,
TimelineState::Stopping | TimelineState::Broken { .. }
) {
// drop the completion guard, if any; it might be holding off the completion
// forever needlessly
self.initial_logical_size_attempt
.lock()
.unwrap_or_else(|e| e.into_inner())
.take();
}
self.state.send_replace(new_state);
}
}
Expand Down Expand Up @@ -1353,8 +1335,6 @@ impl Timeline {
walredo_mgr: Arc<super::WalRedoManager>,
resources: TimelineResources,
pg_version: u32,
initial_logical_size_can_start: Option<completion::Barrier>,
initial_logical_size_attempt: Option<completion::Completion>,
state: TimelineState,
cancel: CancellationToken,
) -> Arc<Self> {
Expand Down Expand Up @@ -1454,8 +1434,6 @@ impl Timeline {
),
delete_progress: Arc::new(tokio::sync::Mutex::new(DeleteTimelineFlow::default())),

initial_logical_size_can_start,
initial_logical_size_attempt: Mutex::new(initial_logical_size_attempt),
cancel,
gate: Gate::new(format!("Timeline<{tenant_shard_id}/{timeline_id}>")),

Expand Down
3 changes: 0 additions & 3 deletions pageserver/src/tenant/timeline/delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ use crate::{
},
CreateTimelineCause, DeleteTimelineError, Tenant,
},
InitializationOrder,
};

use super::{Timeline, TimelineResources};
Expand Down Expand Up @@ -407,7 +406,6 @@ impl DeleteTimelineFlow {
local_metadata: &TimelineMetadata,
remote_client: Option<RemoteTimelineClient>,
deletion_queue_client: DeletionQueueClient,
init_order: Option<&InitializationOrder>,
) -> anyhow::Result<()> {
// Note: here we even skip populating layer map. Timeline is essentially uninitialized.
// RemoteTimelineClient is the only functioning part.
Expand All @@ -420,7 +418,6 @@ impl DeleteTimelineFlow {
remote_client,
deletion_queue_client,
},
init_order,
// Important. We dont pass ancestor above because it can be missing.
// Thus we need to skip the validation here.
CreateTimelineCause::Delete,
Expand Down

0 comments on commit 5dec9f4

Please sign in to comment.