From 3215d5124fab6a76efac8c667d82b946c136ee70 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 8 Oct 2024 17:30:08 +0200 Subject: [PATCH 1/2] podman-remote run: improve how we get the exit code Call the wait endpoint right away when a container is started and not only when attach is done, this allows us for wait to work when the container has been removed otherwise (i.e. podman-remote run --rm). In that case it was possible that wait failed and we then fall back to reading events. However based on some reports there seems to be the chance that the event readin is not working for them either and returns a bad error "Cannot get exit code: " which does not help anybody. Signed-off-by: Paul Holzinger --- pkg/domain/infra/tunnel/containers.go | 91 +++++++-------------------- pkg/domain/infra/tunnel/events.go | 31 --------- 2 files changed, 23 insertions(+), 99 deletions(-) diff --git a/pkg/domain/infra/tunnel/containers.go b/pkg/domain/infra/tunnel/containers.go index 6e8c6fea44..89c4486735 100644 --- a/pkg/domain/infra/tunnel/containers.go +++ b/pkg/domain/infra/tunnel/containers.go @@ -10,13 +10,11 @@ import ( "reflect" "strconv" "strings" - "sync" "time" "github.com/containers/common/pkg/config" "github.com/containers/image/v5/docker/reference" "github.com/containers/podman/v5/libpod/define" - "github.com/containers/podman/v5/libpod/events" "github.com/containers/podman/v5/pkg/api/handlers" "github.com/containers/podman/v5/pkg/bindings" "github.com/containers/podman/v5/pkg/bindings/containers" @@ -647,7 +645,7 @@ func (ic *ContainerEngine) ContainerExecDetached(ctx context.Context, nameOrID s return sessionID, nil } -func startAndAttach(ic *ContainerEngine, name string, detachKeys *string, sigProxy bool, input, output, errput *os.File) error { +func startAndAttach(ic *ContainerEngine, name string, detachKeys *string, sigProxy bool, input, output, errput *os.File) (int, error) { if output == nil && errput == nil { fmt.Printf("%s\n", name) } @@ -671,6 +669,10 @@ func startAndAttach(ic *ContainerEngine, name string, detachKeys *string, sigPro }() // Wait for the attach to actually happen before starting // the container. + + cancelCtx, cancel := context.WithCancel(ic.ClientCtx) + defer cancel() + var code int select { case <-attachReady: startOptions := new(containers.StartOptions) @@ -678,13 +680,21 @@ func startAndAttach(ic *ContainerEngine, name string, detachKeys *string, sigPro startOptions.WithDetachKeys(*dk) } if err := containers.Start(ic.ClientCtx, name, startOptions); err != nil { - return err + return -1, err } + + // call wait immediately after start to avoid racing against container removal when it was created with --rm + exitCode, err := containers.Wait(cancelCtx, name, nil) + if err != nil { + return -1, err + } + code = int(exitCode) + case err := <-attachErr: - return err + return -1, err } // If attachReady happens first, wait for containers.Attach to complete - return <-attachErr + return code, <-attachErr } func logIfRmError(id string, err error, reports []*reports.RmReport) { @@ -742,7 +752,7 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri } ctrRunning := ctr.State == define.ContainerStateRunning.String() if options.Attach { - err = startAndAttach(ic, name, &options.DetachKeys, options.SigProxy, options.Stdin, options.Stdout, options.Stderr) + code, err := startAndAttach(ic, name, &options.DetachKeys, options.SigProxy, options.Stdin, options.Stdout, options.Stderr) if err == define.ErrDetach { // User manually detached // Exit cleanly immediately @@ -780,19 +790,7 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri }() } - exitCode, err := containers.Wait(ic.ClientCtx, name, nil) - if err == define.ErrNoSuchCtr { - // Check events - event, err := ic.GetLastContainerEvent(ctx, name, events.Exited) - if err != nil { - logrus.Errorf("Cannot get exit code: %v", err) - report.ExitCode = define.ExecErrorCodeNotFound - } else { - report.ExitCode = *event.ContainerExitCode - } - } else { - report.ExitCode = int(exitCode) - } + report.ExitCode = code reports = append(reports, &report) return reports, nil } @@ -904,7 +902,9 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta return err }) } - if err := startAndAttach(ic, con.ID, &opts.DetachKeys, opts.SigProxy, opts.InputStream, opts.OutputStream, opts.ErrorStream); err != nil { + + code, err := startAndAttach(ic, con.ID, &opts.DetachKeys, opts.SigProxy, opts.InputStream, opts.OutputStream, opts.ErrorStream) + if err != nil { if err == define.ErrDetach { return &report, nil } @@ -932,53 +932,8 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta }() } - // Wait - exitCode, waitErr := containers.Wait(ic.ClientCtx, con.ID, nil) - if waitErr == nil { - report.ExitCode = int(exitCode) - return &report, nil - } - - // Determine why the wait failed. If the container doesn't exist, - // consult the events. - if !errorhandling.Contains(waitErr, define.ErrNoSuchCtr) { - return &report, waitErr - } - - // Events - eventsChannel := make(chan *events.Event) - eventOptions := entities.EventsOptions{ - EventChan: eventsChannel, - Filter: []string{ - "type=container", - fmt.Sprintf("container=%s", con.ID), - fmt.Sprintf("event=%s", events.Exited), - }, - } - - var lastEvent *events.Event - var mutex sync.Mutex - mutex.Lock() - // Read the events. - go func() { - for e := range eventsChannel { - lastEvent = e - } - mutex.Unlock() - }() - - eventsErr := ic.Events(ctx, eventOptions) - - // Wait for all events to be read - mutex.Lock() - if eventsErr != nil || lastEvent == nil { - logrus.Errorf("Cannot get exit code: %v", err) - report.ExitCode = define.ExecErrorCodeNotFound - return &report, nil //nolint: nilerr - } - - report.ExitCode = *lastEvent.ContainerExitCode - return &report, err + report.ExitCode = code + return &report, nil } func (ic *ContainerEngine) Diff(ctx context.Context, namesOrIDs []string, opts entities.DiffOptions) (*entities.DiffReport, error) { diff --git a/pkg/domain/infra/tunnel/events.go b/pkg/domain/infra/tunnel/events.go index d2c0e62ef0..f893e9508c 100644 --- a/pkg/domain/infra/tunnel/events.go +++ b/pkg/domain/infra/tunnel/events.go @@ -5,7 +5,6 @@ import ( "fmt" "strings" - "github.com/containers/podman/v5/libpod/events" "github.com/containers/podman/v5/pkg/bindings/system" "github.com/containers/podman/v5/pkg/domain/entities" ) @@ -31,33 +30,3 @@ func (ic *ContainerEngine) Events(ctx context.Context, opts entities.EventsOptio options := new(system.EventsOptions).WithFilters(filters).WithSince(opts.Since).WithStream(opts.Stream).WithUntil(opts.Until) return system.Events(ic.ClientCtx, binChan, nil, options) } - -// GetLastContainerEvent takes a container name or ID and an event status and returns -// the last occurrence of the container event. -func (ic *ContainerEngine) GetLastContainerEvent(ctx context.Context, nameOrID string, containerEvent events.Status) (*events.Event, error) { - // check to make sure the event.Status is valid - if _, err := events.StringToStatus(containerEvent.String()); err != nil { - return nil, err - } - var event events.Event - return &event, nil - - /* - FIXME: We need new bindings for this section - filters := []string{ - fmt.Sprintf("container=%s", nameOrID), - fmt.Sprintf("event=%s", containerEvent), - "type=container", - } - - containerEvents, err := system.GetEvents(ctx, entities.EventsOptions{Filter: filters}) - if err != nil { - return nil, err - } - if len(containerEvents) < 1 { - return nil, fmt.Errorf("%s not found: %w", containerEvent.String(), events.ErrEventNotFound) - } - // return the last element in the slice - return containerEvents[len(containerEvents)-1], nil - */ -} From b3829a29324f3618b18a2a2917d5c98de31eb275 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 8 Oct 2024 17:52:22 +0200 Subject: [PATCH 2/2] libpod API: make wait endpoint better against rm races In the common scenario of podman-remote run --rm the API is required to attach + start + wait to get exit code. This has the problem that the wait call races against the container removal from the cleanup process so it may not get the exit code back. However we keep the exit code around for longer than the container so we can just look it up in the endpoint. Of course this only works when we get a full id as param but podman-remote will do that. Signed-off-by: Paul Holzinger --- libpod/runtime.go | 4 ++++ pkg/api/handlers/utils/containers.go | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/libpod/runtime.go b/libpod/runtime.go index 1aea7b4fe6..1daf2855b2 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -1393,3 +1393,7 @@ func (r *Runtime) SystemCheck(ctx context.Context, options entities.SystemCheckO return report, err } + +func (r *Runtime) GetContainerExitCode(id string) (int32, error) { + return r.state.GetContainerExitCode(id) +} diff --git a/pkg/api/handlers/utils/containers.go b/pkg/api/handlers/utils/containers.go index 62af4b45d1..07f8ca7f2f 100644 --- a/pkg/api/handlers/utils/containers.go +++ b/pkg/api/handlers/utils/containers.go @@ -130,6 +130,17 @@ func WaitContainerLibpod(w http.ResponseWriter, r *http.Request) { reports, err := containerEngine.ContainerWait(r.Context(), []string{name}, opts) if err != nil { if errors.Is(err, define.ErrNoSuchCtr) { + // Special case: In the common scenario of podman-remote run --rm + // the API is required to attach + start + wait to get exit code. + // This has the problem that the wait call races against the container + // removal from the cleanup process so it may not get the exit code back. + // However we keep the exit code around for longer than the container so + // we can just look it up here. Of course this only works when we get a + // full id as param but podman-remote will do that + if code, err := runtime.GetContainerExitCode(name); err == nil { + WriteResponse(w, http.StatusOK, strconv.Itoa(int(code))) + return + } ContainerNotFound(w, name, err) return }