Skip to content

Commit

Permalink
storcon: automatically clear Pause/Stop scheduling policies to enable…
Browse files Browse the repository at this point in the history
… detaches (#10011)

## Problem

We saw a tenant get stuck when it had been put into Pause scheduling
mode to pin it to a pageserver, then it was left idle for a while and
the control plane tried to detach it.

Close: #9957

## Summary of changes

- When changing policy to Detached or Secondary, set the scheduling
policy to Active.
- Add a test that exercises this
- When persisting tenant shards, set their `generation_pageserver` to
null if the placement policy is not Attached (this enables consistency
checks to work, and avoids leaving state in the DB that could be
confusing/misleading in future)
  • Loading branch information
jcsp authored Dec 7, 2024
1 parent 4d7111f commit ec79087
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 2 deletions.
11 changes: 11 additions & 0 deletions libs/pageserver_api/src/controller_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,17 @@ impl From<NodeAvailability> for NodeAvailabilityWrapper {
}
}

/// Scheduling policy enables us to selectively disable some automatic actions that the
/// controller performs on a tenant shard. This is only set to a non-default value by
/// human intervention, and it is reset to the default value (Active) when the tenant's
/// placement policy is modified away from Attached.
///
/// The typical use of a non-Active scheduling policy is one of:
/// - Pinnning a shard to a node (i.e. migrating it there & setting a non-Active scheduling policy)
/// - Working around a bug (e.g. if something is flapping and we need to stop it until the bug is fixed)
///
/// If you're not sure which policy to use to pin a shard to its current location, you probably
/// want Pause.
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
pub enum ShardSchedulingPolicy {
// Normal mode: the tenant's scheduled locations may be updated at will, including
Expand Down
9 changes: 9 additions & 0 deletions storage_controller/src/persistence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -636,13 +636,21 @@ impl Persistence {
.into_boxed(),
};

// Clear generation_pageserver if we are moving into a state where we won't have
// any attached pageservers.
let input_generation_pageserver = match input_placement_policy {
None | Some(PlacementPolicy::Attached(_)) => None,
Some(PlacementPolicy::Detached | PlacementPolicy::Secondary) => Some(None),
};

#[derive(AsChangeset)]
#[diesel(table_name = crate::schema::tenant_shards)]
struct ShardUpdate {
generation: Option<i32>,
placement_policy: Option<String>,
config: Option<String>,
scheduling_policy: Option<String>,
generation_pageserver: Option<Option<i64>>,
}

let update = ShardUpdate {
Expand All @@ -655,6 +663,7 @@ impl Persistence {
.map(|c| serde_json::to_string(&c).unwrap()),
scheduling_policy: input_scheduling_policy
.map(|p| serde_json::to_string(&p).unwrap()),
generation_pageserver: input_generation_pageserver,
};

query.set(update).execute(conn)?;
Expand Down
39 changes: 37 additions & 2 deletions storage_controller/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,9 @@ struct ShardUpdate {

/// If this is None, generation is not updated.
generation: Option<Generation>,

/// If this is None, scheduling policy is not updated.
scheduling_policy: Option<ShardSchedulingPolicy>,
}

enum StopReconciliationsReason {
Expand Down Expand Up @@ -2376,6 +2379,23 @@ impl Service {
}
};

// Ordinarily we do not update scheduling policy, but when making major changes
// like detaching or demoting to secondary-only, we need to force the scheduling
// mode to Active, or the caller's expected outcome (detach it) will not happen.
let scheduling_policy = match req.config.mode {
LocationConfigMode::Detached | LocationConfigMode::Secondary => {
// Special case: when making major changes like detaching or demoting to secondary-only,
// we need to force the scheduling mode to Active, or nothing will happen.
Some(ShardSchedulingPolicy::Active)
}
LocationConfigMode::AttachedMulti
| LocationConfigMode::AttachedSingle
| LocationConfigMode::AttachedStale => {
// While attached, continue to respect whatever the existing scheduling mode is.
None
}
};

let mut create = true;
for (shard_id, shard) in tenants.range_mut(TenantShardId::tenant_range(tenant_id)) {
// Saw an existing shard: this is not a creation
Expand All @@ -2401,6 +2421,7 @@ impl Service {
placement_policy: placement_policy.clone(),
tenant_config: req.config.tenant_conf.clone(),
generation: set_generation,
scheduling_policy,
});
}

Expand Down Expand Up @@ -2497,6 +2518,7 @@ impl Service {
placement_policy,
tenant_config,
generation,
scheduling_policy,
} in &updates
{
self.persistence
Expand All @@ -2505,7 +2527,7 @@ impl Service {
Some(placement_policy.clone()),
Some(tenant_config.clone()),
*generation,
None,
*scheduling_policy,
)
.await?;
}
Expand All @@ -2521,6 +2543,7 @@ impl Service {
placement_policy,
tenant_config,
generation: update_generation,
scheduling_policy,
} in updates
{
let Some(shard) = tenants.get_mut(&tenant_shard_id) else {
Expand All @@ -2539,6 +2562,10 @@ impl Service {
shard.generation = Some(generation);
}

if let Some(scheduling_policy) = scheduling_policy {
shard.set_scheduling_policy(scheduling_policy);
}

shard.schedule(scheduler, &mut schedule_context)?;

let maybe_waiter = self.maybe_reconcile_shard(shard, nodes);
Expand Down Expand Up @@ -2992,9 +3019,17 @@ impl Service {

let TenantPolicyRequest {
placement,
scheduling,
mut scheduling,
} = req;

if let Some(PlacementPolicy::Detached | PlacementPolicy::Secondary) = placement {
// When someone configures a tenant to detach, we force the scheduling policy to enable
// this to take effect.
if scheduling.is_none() {
scheduling = Some(ShardSchedulingPolicy::Active);
}
}

self.persistence
.update_tenant_shard(
TenantFilter::Tenant(tenant_id),
Expand Down
52 changes: 52 additions & 0 deletions test_runner/regress/test_storage_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -3230,3 +3230,55 @@ def has_hit_migration_failpoint():
# Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
raise


@run_only_on_default_postgres("Postgres version makes no difference here")
def test_storage_controller_detached_stopped(
neon_env_builder: NeonEnvBuilder,
):
"""
Test that detaching a tenant while it has scheduling policy set to Paused or Stop works
"""

remote_storage_kind = s3_storage()
neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)

neon_env_builder.num_pageservers = 1

env = neon_env_builder.init_configs()
env.start()
virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)

tenant_id = TenantId.generate()
env.storage_controller.tenant_create(
tenant_id,
shard_count=1,
)

assert len(env.pageserver.http_client().tenant_list_locations()["tenant_shards"]) == 1

# Disable scheduling: ordinarily this would prevent the tenant's configuration being
# reconciled to pageservers, but this should be overridden when detaching.
env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy.*")
env.storage_controller.tenant_policy_update(
tenant_id,
{"scheduling": "Stop"},
)

env.storage_controller.consistency_check()

# Detach the tenant
virtual_ps_http.tenant_location_conf(
tenant_id,
{
"mode": "Detached",
"secondary_conf": None,
"tenant_conf": {},
"generation": None,
},
)

env.storage_controller.consistency_check()

# Confirm the detach happened
assert env.pageserver.http_client().tenant_list_locations()["tenant_shards"] == []

1 comment on commit ec79087

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

7201 tests run: 6866 passed, 0 failed, 335 skipped (full report)


Flaky tests (3)

Postgres 17

Postgres 16

Postgres 14

Code coverage* (full report)

  • functions: 31.4% (8332 of 26529 functions)
  • lines: 47.7% (65549 of 137366 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
ec79087 at 2024-12-07T15:12:15.778Z :recycle:

Please sign in to comment.