Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

storcon: automatically clear Pause/Stop scheduling policies to enable detaches #10011

Merged
merged 5 commits into from
Dec 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions libs/pageserver_api/src/controller_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,17 @@ impl From<NodeAvailability> for NodeAvailabilityWrapper {
}
}

/// Scheduling policy enables us to selectively disable some automatic actions that the
/// controller performs on a tenant shard. This is only set to a non-default value by
/// human intervention, and it is reset to the default value (Active) when the tenant's
/// placement policy is modified away from Attached.
///
/// The typical use of a non-Active scheduling policy is one of:
/// - Pinnning a shard to a node (i.e. migrating it there & setting a non-Active scheduling policy)
/// - Working around a bug (e.g. if something is flapping and we need to stop it until the bug is fixed)
///
/// If you're not sure which policy to use to pin a shard to its current location, you probably
/// want Pause.
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
pub enum ShardSchedulingPolicy {
// Normal mode: the tenant's scheduled locations may be updated at will, including
Expand Down
9 changes: 9 additions & 0 deletions storage_controller/src/persistence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -636,13 +636,21 @@ impl Persistence {
.into_boxed(),
};

// Clear generation_pageserver if we are moving into a state where we won't have
// any attached pageservers.
let input_generation_pageserver = match input_placement_policy {
None | Some(PlacementPolicy::Attached(_)) => None,
Some(PlacementPolicy::Detached | PlacementPolicy::Secondary) => Some(None),
};

#[derive(AsChangeset)]
#[diesel(table_name = crate::schema::tenant_shards)]
struct ShardUpdate {
generation: Option<i32>,
placement_policy: Option<String>,
config: Option<String>,
scheduling_policy: Option<String>,
generation_pageserver: Option<Option<i64>>,
}

let update = ShardUpdate {
Expand All @@ -655,6 +663,7 @@ impl Persistence {
.map(|c| serde_json::to_string(&c).unwrap()),
scheduling_policy: input_scheduling_policy
.map(|p| serde_json::to_string(&p).unwrap()),
generation_pageserver: input_generation_pageserver,
};

query.set(update).execute(conn)?;
Expand Down
39 changes: 37 additions & 2 deletions storage_controller/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,9 @@ struct ShardUpdate {

/// If this is None, generation is not updated.
generation: Option<Generation>,

/// If this is None, scheduling policy is not updated.
scheduling_policy: Option<ShardSchedulingPolicy>,
}

enum StopReconciliationsReason {
Expand Down Expand Up @@ -2376,6 +2379,23 @@ impl Service {
}
};

// Ordinarily we do not update scheduling policy, but when making major changes
// like detaching or demoting to secondary-only, we need to force the scheduling
// mode to Active, or the caller's expected outcome (detach it) will not happen.
let scheduling_policy = match req.config.mode {
LocationConfigMode::Detached | LocationConfigMode::Secondary => {
// Special case: when making major changes like detaching or demoting to secondary-only,
// we need to force the scheduling mode to Active, or nothing will happen.
Some(ShardSchedulingPolicy::Active)
}
LocationConfigMode::AttachedMulti
| LocationConfigMode::AttachedSingle
| LocationConfigMode::AttachedStale => {
// While attached, continue to respect whatever the existing scheduling mode is.
None
}
};

let mut create = true;
for (shard_id, shard) in tenants.range_mut(TenantShardId::tenant_range(tenant_id)) {
// Saw an existing shard: this is not a creation
Expand All @@ -2401,6 +2421,7 @@ impl Service {
placement_policy: placement_policy.clone(),
tenant_config: req.config.tenant_conf.clone(),
generation: set_generation,
scheduling_policy,
});
}

Expand Down Expand Up @@ -2497,6 +2518,7 @@ impl Service {
placement_policy,
tenant_config,
generation,
scheduling_policy,
} in &updates
{
self.persistence
Expand All @@ -2505,7 +2527,7 @@ impl Service {
Some(placement_policy.clone()),
Some(tenant_config.clone()),
*generation,
None,
*scheduling_policy,
)
.await?;
}
Expand All @@ -2521,6 +2543,7 @@ impl Service {
placement_policy,
tenant_config,
generation: update_generation,
scheduling_policy,
} in updates
{
let Some(shard) = tenants.get_mut(&tenant_shard_id) else {
Expand All @@ -2539,6 +2562,10 @@ impl Service {
shard.generation = Some(generation);
}

if let Some(scheduling_policy) = scheduling_policy {
shard.set_scheduling_policy(scheduling_policy);
}

shard.schedule(scheduler, &mut schedule_context)?;

let maybe_waiter = self.maybe_reconcile_shard(shard, nodes);
Expand Down Expand Up @@ -2992,9 +3019,17 @@ impl Service {

let TenantPolicyRequest {
placement,
scheduling,
mut scheduling,
} = req;

if let Some(PlacementPolicy::Detached | PlacementPolicy::Secondary) = placement {
// When someone configures a tenant to detach, we force the scheduling policy to enable
// this to take effect.
if scheduling.is_none() {
scheduling = Some(ShardSchedulingPolicy::Active);
}
}

self.persistence
.update_tenant_shard(
TenantFilter::Tenant(tenant_id),
Expand Down
52 changes: 52 additions & 0 deletions test_runner/regress/test_storage_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -3230,3 +3230,55 @@ def has_hit_migration_failpoint():
# Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
raise


@run_only_on_default_postgres("Postgres version makes no difference here")
def test_storage_controller_detached_stopped(
neon_env_builder: NeonEnvBuilder,
):
"""
Test that detaching a tenant while it has scheduling policy set to Paused or Stop works
"""

remote_storage_kind = s3_storage()
neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)

neon_env_builder.num_pageservers = 1

env = neon_env_builder.init_configs()
env.start()
virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)

tenant_id = TenantId.generate()
env.storage_controller.tenant_create(
tenant_id,
shard_count=1,
)

assert len(env.pageserver.http_client().tenant_list_locations()["tenant_shards"]) == 1

# Disable scheduling: ordinarily this would prevent the tenant's configuration being
# reconciled to pageservers, but this should be overridden when detaching.
env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy.*")
env.storage_controller.tenant_policy_update(
tenant_id,
{"scheduling": "Stop"},
)

env.storage_controller.consistency_check()

# Detach the tenant
virtual_ps_http.tenant_location_conf(
tenant_id,
{
"mode": "Detached",
"secondary_conf": None,
"tenant_conf": {},
"generation": None,
},
)

env.storage_controller.consistency_check()

# Confirm the detach happened
assert env.pageserver.http_client().tenant_list_locations()["tenant_shards"] == []
Loading