From bf74797c6906cf9c47a71c13caeff8f771639258 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 27 Aug 2024 11:45:13 +0200 Subject: [PATCH 1/2] fix races in the HTTP attach API This is very similar to commit 3280da0500, we cannot check the state then unlock to then lock again and do the action. Everything must happen under one lock. To fix this move the code into the HTTPAttach function in libpod. The locking here is a bit weird because attach blocks for the lifetime of attach which can be very long so we must unlock before performing the attach. Fixes #23757 Signed-off-by: Paul Holzinger --- libpod/container_api.go | 28 +++++++++++++++----- pkg/api/handlers/compat/containers_attach.go | 18 ------------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/libpod/container_api.go b/libpod/container_api.go index 8a309c4cf9..02e35199ba 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -40,7 +40,10 @@ func (c *Container) Init(ctx context.Context, recursive bool) error { return err } } + return c.initUnlocked(ctx, recursive) +} +func (c *Container) initUnlocked(ctx context.Context, recursive bool) error { if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateStopped, define.ContainerStateExited) { return fmt.Errorf("container %s has already been created in runtime: %w", c.ID(), define.ErrCtrStateInvalid) } @@ -342,25 +345,36 @@ func (c *Container) HTTPAttach(r *http.Request, w http.ResponseWriter, streams * close(hijackDone) }() + locked := false if !c.batched { + locked = true c.lock.Lock() + defer func() { + if locked { + c.lock.Unlock() + } + }() if err := c.syncContainer(); err != nil { - c.lock.Unlock() - return err } - // We are NOT holding the lock for the duration of the function. - c.lock.Unlock() } - - if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning) { - return fmt.Errorf("can only attach to created or running containers: %w", define.ErrCtrStateInvalid) + // For Docker compatibility, we need to re-initialize containers in these states. + if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited, define.ContainerStateStopped) { + if err := c.initUnlocked(r.Context(), c.config.Pod != ""); err != nil { + return fmt.Errorf("preparing container %s for attach: %w", c.ID(), err) + } + } else if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning) { + return fmt.Errorf("can only attach to created or running containers - currently in state %s: %w", c.state.State.String(), define.ErrCtrStateInvalid) } if !streamAttach && !streamLogs { return fmt.Errorf("must specify at least one of stream or logs: %w", define.ErrInvalidArg) } + // We are NOT holding the lock for the duration of the function. + locked = false + c.lock.Unlock() + logrus.Infof("Performing HTTP Hijack attach to container %s", c.ID()) c.newContainerEvent(events.Attach) diff --git a/pkg/api/handlers/compat/containers_attach.go b/pkg/api/handlers/compat/containers_attach.go index 301be134b1..6dacdb29ee 100644 --- a/pkg/api/handlers/compat/containers_attach.go +++ b/pkg/api/handlers/compat/containers_attach.go @@ -4,11 +4,9 @@ package compat import ( "errors" - "fmt" "net/http" "github.com/containers/podman/v5/libpod" - "github.com/containers/podman/v5/libpod/define" "github.com/containers/podman/v5/pkg/api/handlers/utils" "github.com/containers/podman/v5/pkg/api/server/idle" api "github.com/containers/podman/v5/pkg/api/types" @@ -79,22 +77,6 @@ func AttachContainer(w http.ResponseWriter, r *http.Request) { return } - state, err := ctr.State() - if err != nil { - utils.InternalServerError(w, err) - return - } - // For Docker compatibility, we need to re-initialize containers in these states. - if state == define.ContainerStateConfigured || state == define.ContainerStateExited || state == define.ContainerStateStopped { - if err := ctr.Init(r.Context(), ctr.PodID() != ""); err != nil { - utils.Error(w, http.StatusConflict, fmt.Errorf("preparing container %s for attach: %w", ctr.ID(), err)) - return - } - } else if !(state == define.ContainerStateCreated || state == define.ContainerStateRunning) { - utils.InternalServerError(w, fmt.Errorf("can only attach to created or running containers - currently in state %s: %w", state.String(), define.ErrCtrStateInvalid)) - return - } - logErr := func(e error) { logrus.Errorf("Error attaching to container %s: %v", ctr.ID(), e) } From a89fef6e2a630bb6912bdc1f308623e68a35271c Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 27 Aug 2024 14:54:31 +0200 Subject: [PATCH 2/2] cleanup: add new --stopped-only option The podman container cleanup process runs asynchronous and by the time it gets the lock it is possible another podman process already did the cleanup and then did a new init() to start it again. If the cleanup process gets the lock there it will cause very weird things. This can be observed in the remote start API as CI flakes. Fixes #23754 Signed-off-by: Paul Holzinger --- cmd/podman/containers/cleanup.go | 5 +++++ libpod/container_api.go | 9 +++++++-- libpod/pod_api.go | 4 ++-- pkg/domain/entities/containers.go | 1 + pkg/domain/infra/abi/containers.go | 4 ++-- pkg/specgenutil/util.go | 4 +++- 6 files changed, 20 insertions(+), 7 deletions(-) diff --git a/cmd/podman/containers/cleanup.go b/cmd/podman/containers/cleanup.go index db15c6d834..72c1feab53 100644 --- a/cmd/podman/containers/cleanup.go +++ b/cmd/podman/containers/cleanup.go @@ -53,6 +53,11 @@ func init() { flags.BoolVar(&cleanupOptions.Remove, "rm", false, "After cleanup, remove the container entirely") flags.BoolVar(&cleanupOptions.RemoveImage, "rmi", false, "After cleanup, remove the image entirely") + + stoppedOnlyFlag := "stopped-only" + flags.BoolVar(&cleanupOptions.StoppedOnly, stoppedOnlyFlag, false, "Only cleanup when the container is in the stopped state") + _ = flags.MarkHidden(stoppedOnlyFlag) + validate.AddLatestFlag(cleanupCommand, &cleanupOptions.Latest) } diff --git a/libpod/container_api.go b/libpod/container_api.go index 02e35199ba..2b5e305348 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -785,8 +785,10 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou } // Cleanup unmounts all mount points in container and cleans up container storage -// It also cleans up the network stack -func (c *Container) Cleanup(ctx context.Context) error { +// It also cleans up the network stack. +// onlyStopped is set by the podman container cleanup to ensure we only cleanup a stopped container, +// all other states mean another process already called cleanup before us which is fine in such cases. +func (c *Container) Cleanup(ctx context.Context, onlyStopped bool) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() @@ -808,6 +810,9 @@ func (c *Container) Cleanup(ctx context.Context) error { if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStateExited) { return fmt.Errorf("container %s is running or paused, refusing to clean up: %w", c.ID(), define.ErrCtrStateInvalid) } + if onlyStopped && !c.ensureState(define.ContainerStateStopped) { + return fmt.Errorf("container %s is not stopped and only cleanup for a stopped container was requested: %w", c.ID(), define.ErrCtrStateInvalid) + } // if the container was not created in the oci runtime or was already cleaned up, then do nothing if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { diff --git a/libpod/pod_api.go b/libpod/pod_api.go index d385c56286..f0effef844 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -180,7 +180,7 @@ func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m } if cleanup { - err := c.Cleanup(ctx) + err := c.Cleanup(ctx, false) if err != nil && !errors.Is(err, define.ErrCtrStateInvalid) && !errors.Is(err, define.ErrCtrStopped) { return err } @@ -299,7 +299,7 @@ func (p *Pod) Cleanup(ctx context.Context) (map[string]error, error) { c := ctr logrus.Debugf("Adding parallel job to clean up container %s", c.ID()) retChan := parallel.Enqueue(ctx, func() error { - return c.Cleanup(ctx) + return c.Cleanup(ctx, false) }) ctrErrChan[c.ID()] = retChan diff --git a/pkg/domain/entities/containers.go b/pkg/domain/entities/containers.go index 24c1d78f8a..105b091549 100644 --- a/pkg/domain/entities/containers.go +++ b/pkg/domain/entities/containers.go @@ -371,6 +371,7 @@ type ContainerCleanupOptions struct { Latest bool Remove bool RemoveImage bool + StoppedOnly bool // Only cleanup if the container is stopped, i.e. was running before } // ContainerCleanupReport describes the response from a diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index 1694d6ce79..f20c412858 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -313,7 +313,7 @@ func (ic *ContainerEngine) ContainerStop(ctx context.Context, namesOrIds []strin } } } else { - if err = c.Cleanup(ctx); err != nil { + if err = c.Cleanup(ctx, false); err != nil { // The container could still have been removed, as we unlocked // after we stopped it. if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) { @@ -1320,7 +1320,7 @@ func (ic *ContainerEngine) ContainerCleanup(ctx context.Context, namesOrIds []st report.RmErr = fmt.Errorf("failed to clean up and remove container %v: %w", ctr.ID(), err) } } else { - err := ctr.Cleanup(ctx) + err := ctr.Cleanup(ctx, options.StoppedOnly) // ignore error if ctr is removed or cannot be cleaned up, likely the ctr was already restarted by another process if err != nil && !errors.Is(err, define.ErrNoSuchCtr) && !errors.Is(err, define.ErrCtrStateInvalid) { report.CleanErr = fmt.Errorf("failed to clean up container %v: %w", ctr.ID(), err) diff --git a/pkg/specgenutil/util.go b/pkg/specgenutil/util.go index 71065997b0..222e50f934 100644 --- a/pkg/specgenutil/util.go +++ b/pkg/specgenutil/util.go @@ -311,7 +311,9 @@ func CreateExitCommandArgs(storageConfig storageTypes.StoreOptions, config *conf command = append(command, "--module", module) } - command = append(command, []string{"container", "cleanup"}...) + // --stopped-only is used to ensure we only cleanup stopped containers and do not race + // against other processes that did a cleanup() + init() again before we had the chance to run + command = append(command, []string{"container", "cleanup", "--stopped-only"}...) if rm { command = append(command, "--rm")