From 224ba73e9028b7a99f8b1a8b5b276702597f1ef8 Mon Sep 17 00:00:00 2001 From: Cyrill Troxler Date: Sun, 29 Sep 2024 11:24:37 +0200 Subject: [PATCH] fix: improve handling on scaledown failure Instead of exiting our shim, we just set our initialProcess as exited so it will be killed and restarted by CRI. This is way cleaner since we don't leave orphan socket files laying around. --- zeropod/container.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zeropod/container.go b/zeropod/container.go index 71c7467..0f3cd19 100644 --- a/zeropod/container.go +++ b/zeropod/container.go @@ -138,10 +138,10 @@ func (c *Container) scheduleScaleDownIn(in time.Duration) error { log.G(c.context).Info("scaling down after scale down duration is up") if err := c.scaleDown(c.context); err != nil { - // checkpointing failed, this is currently unrecoverable, so we - // shutdown our shim and let containerd recreate it. - log.G(c.context).Fatalf("scale down failed: %s", err) - os.Exit(1) + // checkpointing failed, this is currently unrecoverable. We set our + // initialProcess as exited to make sure it's restarted + log.G(c.context).Errorf("scale down failed: %s", err) + c.initialProcess.SetExited(1) } })