From 60ed9fa5530e77d0a55cd60e5e40ca588d437956 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Wed, 4 Dec 2024 14:17:13 +0000
Subject: [PATCH 1/4] storcon: clear generation_pageserver when detaching

---
 storage_controller/src/persistence.rs | 9 +++++++++
 1 file changed, 9 insertions(+)
diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs
index 14cc51240d10..7ca80c7dfeec 100644
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -636,6 +636,13 @@ impl Persistence {
                     .into_boxed(),
             };
 
+            // Clear generation_pageserver if we are moving into a state where we won't have
+            // any attached pageservers.
+            let input_generation_pageserver = match input_placement_policy {
+                None | Some(PlacementPolicy::Attached(_)) => None,
+                Some(PlacementPolicy::Detached | PlacementPolicy::Secondary) => Some(None),
+            };
+
             #[derive(AsChangeset)]
             #[diesel(table_name = crate::schema::tenant_shards)]
             struct ShardUpdate {
@@ -643,6 +650,7 @@ impl Persistence {
                 placement_policy: Option<String>,
                 config: Option<String>,
                 scheduling_policy: Option<String>,
+                generation_pageserver: Option<Option<i64>>,
             }
 
             let update = ShardUpdate {
@@ -655,6 +663,7 @@ impl Persistence {
                     .map(|c| serde_json::to_string(&c).unwrap()),
                 scheduling_policy: input_scheduling_policy
                     .map(|p| serde_json::to_string(&p).unwrap()),
+                generation_pageserver: input_generation_pageserver,
             };
 
             query.set(update).execute(conn)?;

From 3a3445be18657c54da306e01b8b3ccd2da47af06 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Wed, 4 Dec 2024 18:32:54 +0000
Subject: [PATCH 2/4] storcon: set scheduling policy Active when detaching

Closes: https://github.com/neondatabase/neon/issues/9957
---
 storage_controller/src/service.rs | 39 +++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index 92ec58cb4d68..fa0e7bd8017d 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -513,6 +513,9 @@ struct ShardUpdate {
 
     /// If this is None, generation is not updated.
     generation: Option<Generation>,
+
+    /// If this is None, scheduling policy is not updated.
+    scheduling_policy: Option<ShardSchedulingPolicy>,
 }
 
 enum StopReconciliationsReason {
@@ -2376,6 +2379,23 @@ impl Service {
             }
         };
 
+        // Ordinarily we do not update scheduling policy, but when making major changes
+        // like detaching or demoting to secondary-only, we need to force the scheduling
+        // mode to Active, or the caller's expected outcome (detach it) will not happen.
+        let scheduling_policy = match req.config.mode {
+            LocationConfigMode::Detached | LocationConfigMode::Secondary => {
+                // Special case: when making major changes like detaching or demoting to secondary-only,
+                // we need to force the scheduling mode to Active, or nothing will happen.
+                Some(ShardSchedulingPolicy::Active)
+            }
+            LocationConfigMode::AttachedMulti
+            | LocationConfigMode::AttachedSingle
+            | LocationConfigMode::AttachedStale => {
+                // While attached, continue to respect whatever the existing scheduling mode is.
+                None
+            }
+        };
+
         let mut create = true;
         for (shard_id, shard) in tenants.range_mut(TenantShardId::tenant_range(tenant_id)) {
             // Saw an existing shard: this is not a creation
@@ -2401,6 +2421,7 @@ impl Service {
                 placement_policy: placement_policy.clone(),
                 tenant_config: req.config.tenant_conf.clone(),
                 generation: set_generation,
+                scheduling_policy,
             });
         }
 
@@ -2497,6 +2518,7 @@ impl Service {
                     placement_policy,
                     tenant_config,
                     generation,
+                    scheduling_policy,
                 } in &updates
                 {
                     self.persistence
@@ -2505,7 +2527,7 @@ impl Service {
                             Some(placement_policy.clone()),
                             Some(tenant_config.clone()),
                             *generation,
-                            None,
+                            *scheduling_policy,
                         )
                         .await?;
                 }
@@ -2521,6 +2543,7 @@ impl Service {
                         placement_policy,
                         tenant_config,
                         generation: update_generation,
+                        scheduling_policy,
                     } in updates
                     {
                         let Some(shard) = tenants.get_mut(&tenant_shard_id) else {
@@ -2539,6 +2562,10 @@ impl Service {
                             shard.generation = Some(generation);
                         }
 
+                        if let Some(scheduling_policy) = scheduling_policy {
+                            shard.set_scheduling_policy(scheduling_policy);
+                        }
+
                         shard.schedule(scheduler, &mut schedule_context)?;
 
                         let maybe_waiter = self.maybe_reconcile_shard(shard, nodes);
@@ -2992,9 +3019,17 @@ impl Service {
 
         let TenantPolicyRequest {
             placement,
-            scheduling,
+            mut scheduling,
         } = req;
 
+        if let Some(PlacementPolicy::Detached | PlacementPolicy::Secondary) = placement {
+            // When someone configures a tenant to detach, we force the scheduling policy to enable
+            // this to take effect.
+            if scheduling.is_none() {
+                scheduling = Some(ShardSchedulingPolicy::Active);
+            }
+        }
+
         self.persistence
             .update_tenant_shard(
                 TenantFilter::Tenant(tenant_id),

From d97286e87abbdac86636d1f6033c0c5c0e04f0d1 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Wed, 4 Dec 2024 13:58:50 +0000
Subject: [PATCH 3/4] tests: add test_storage_controller_detach_stopped

---
 .../regress/test_storage_controller.py        | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index f878116d533d..9f74dcccb99e 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -3230,3 +3230,55 @@ def has_hit_migration_failpoint():
         # Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
         env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
         raise
+
+
+@run_only_on_default_postgres("Postgres version makes no difference here")
+def test_storage_controller_detached_stopped(
+    neon_env_builder: NeonEnvBuilder,
+):
+    """
+    Test that detaching a tenant while it has scheduling policy set to Paused or Stop works
+    """
+
+    remote_storage_kind = s3_storage()
+    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
+
+    neon_env_builder.num_pageservers = 1
+
+    env = neon_env_builder.init_configs()
+    env.start()
+    virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
+
+    tenant_id = TenantId.generate()
+    env.storage_controller.tenant_create(
+        tenant_id,
+        shard_count=1,
+    )
+
+    assert len(env.pageserver.http_client().tenant_list_locations()["tenant_shards"]) == 1
+
+    # Disable scheduling: ordinarily this would prevent the tenant's configuration being
+    # reconciled to pageservers, but this should be overridden when detaching.
+    env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy.*")
+    env.storage_controller.tenant_policy_update(
+        tenant_id,
+        {"scheduling": "Stop"},
+    )
+
+    env.storage_controller.consistency_check()
+
+    # Detach the tenant
+    virtual_ps_http.tenant_location_conf(
+        tenant_id,
+        {
+            "mode": "Detached",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": None,
+        },
+    )
+
+    env.storage_controller.consistency_check()
+
+    # Confirm the detach happened
+    assert env.pageserver.http_client().tenant_list_locations()["tenant_shards"] == []

From 294a941f03c4b7675dae764b3065443a2d563bcb Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 6 Dec 2024 12:20:37 +0000
Subject: [PATCH 4/4] libs: add explanatory comment to ShardSchedulingPolicy

---
 libs/pageserver_api/src/controller_api.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs
index 9a5ebc95bdd3..6839ef69f592 100644
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -245,6 +245,17 @@ impl From<NodeAvailability> for NodeAvailabilityWrapper {
     }
 }
 
+/// Scheduling policy enables us to selectively disable some automatic actions that the
+/// controller performs on a tenant shard. This is only set to a non-default value by
+/// human intervention, and it is reset to the default value (Active) when the tenant's
+/// placement policy is modified away from Attached.
+///
+/// The typical use of a non-Active scheduling policy is one of:
+/// - Pinnning a shard to a node (i.e. migrating it there & setting a non-Active scheduling policy)
+/// - Working around a bug (e.g. if something is flapping and we need to stop it until the bug is fixed)
+///
+/// If you're not sure which policy to use to pin a shard to its current location, you probably
+/// want Pause.
 #[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
 pub enum ShardSchedulingPolicy {
     // Normal mode: the tenant's scheduled locations may be updated at will, including