From 84b039e615e9e7391e22e97fc5ee306cac29385b Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Mon, 8 Jul 2024 19:54:02 +0200 Subject: [PATCH] compute_ctl: Use 'fast' shutdown for Postgres termination (#8289) ## Problem We currently use 'immediate' mode in the most commonly used shutdown path, when the control plane calls a `compute_ctl` API to terminate Postgres inside compute without waiting for the actual pod / VM termination. Yet, 'immediate' shutdown doesn't create a shutdown checkpoint and ROs have bad times figuring out the list of running xacts during next start. ## Summary of changes Use 'fast' mode, which creates a shutdown checkpoint that is important for ROs to get a list of running xacts faster instead of going through the CLOG. On the control plane side, we poll this `compute_ctl` termination API for 10s, it should be enough as we don't really write any data at checkpoint time. If it times out, we anyway switch to the slow k8s-based termination. See https://www.postgresql.org/docs/current/server-shutdown.html for the list of modes and signals. The default VM shutdown hook already uses `fast` mode, see [1] [1] https://github.com/neondatabase/neon/blob/c9fd8d76937c2031fd4fea1cdf661d6cf4f00dc3/vm-image-spec.yaml#L30-L31 Related to #6211 --- compute_tools/src/compute.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 41a52ef5b641..1fa2b9f71d64 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -1386,7 +1386,9 @@ pub fn forward_termination_signal() { let pg_pid = PG_PID.load(Ordering::SeqCst); if pg_pid != 0 { let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32); - // use 'immediate' shutdown (SIGQUIT): https://www.postgresql.org/docs/current/server-shutdown.html - kill(pg_pid, Signal::SIGQUIT).ok(); + // Use 'fast' shutdown (SIGINT) because it also creates a shutdown checkpoint, which is important for + // ROs to get a list of running xacts faster instead of going through the CLOG. + // See https://www.postgresql.org/docs/current/server-shutdown.html for the list of modes and signals. + kill(pg_pid, Signal::SIGINT).ok(); } }