From 224ba73e9028b7a99f8b1a8b5b276702597f1ef8 Mon Sep 17 00:00:00 2001
From: Cyrill Troxler <cyrilltroxler@gmail.com>
Date: Sun, 29 Sep 2024 11:24:37 +0200
Subject: [PATCH] fix: improve handling on scaledown failure

Instead of exiting our shim, we just set our initialProcess as exited so
it will be killed and restarted by CRI. This is way cleaner since we
don't leave orphan socket files laying around.
---
 zeropod/container.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/zeropod/container.go b/zeropod/container.go
index 71c7467..0f3cd19 100644
--- a/zeropod/container.go
+++ b/zeropod/container.go
@@ -138,10 +138,10 @@ func (c *Container) scheduleScaleDownIn(in time.Duration) error {
 		log.G(c.context).Info("scaling down after scale down duration is up")
 
 		if err := c.scaleDown(c.context); err != nil {
-			// checkpointing failed, this is currently unrecoverable, so we
-			// shutdown our shim and let containerd recreate it.
-			log.G(c.context).Fatalf("scale down failed: %s", err)
-			os.Exit(1)
+			// checkpointing failed, this is currently unrecoverable. We set our
+			// initialProcess as exited to make sure it's restarted
+			log.G(c.context).Errorf("scale down failed: %s", err)
+			c.initialProcess.SetExited(1)
 		}
 
 	})