Skip to content

Commit

Permalink
Merge pull request #22658 from giuseppe/libpod-wait-for-healthy-on-ma…
Browse files Browse the repository at this point in the history
…in-thread

libpod: wait for healthy on main thread
  • Loading branch information
openshift-merge-bot[bot] authored May 16, 2024
2 parents d1dd720 + 35375e0 commit f7a3046
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 19 deletions.
22 changes: 20 additions & 2 deletions libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error)
}

// Start the container
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}

// Update updates the given container.
Expand Down Expand Up @@ -194,6 +197,9 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
opts.Start = true
opts.Started = startedChan

// attach and start the container on a different thread. waitForHealthy must
// be done later, as it requires to run on the same thread that holds the lock
// for the container.
if err := c.ociRuntime.Attach(c, opts); err != nil {
attachChan <- err
}
Expand All @@ -207,7 +213,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
c.newContainerEvent(events.Attach)
}

return attachChan, nil
return attachChan, c.waitForHealthy(ctx)
}

// RestartWithTimeout restarts a running container and takes a given timeout in uint
Expand Down Expand Up @@ -775,6 +781,14 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
}
}
if len(wantedHealthStates) > 0 {
// even if we are interested only in the health check
// check that the container is still running to avoid
// waiting until the timeout expires.
state, err := c.State()
if err != nil {
trySend(-1, err)
return
}
status, err := c.HealthCheckStatus()
if err != nil {
trySend(-1, err)
Expand All @@ -784,6 +798,10 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
trySend(-1, nil)
return
}
if state != define.ContainerStateCreated && state != define.ContainerStateRunning && state != define.ContainerStatePaused {
trySend(-1, define.ErrCtrStopped)
return
}
}
select {
case <-ctx.Done():
Expand Down
32 changes: 23 additions & 9 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,10 +330,10 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
return false, err
}
}
if err := c.start(ctx); err != nil {
if err := c.start(); err != nil {
return false, err
}
return true, nil
return true, c.waitForHealthy(ctx)
}

// Ensure that the container is in a specific state or state.
Expand Down Expand Up @@ -1208,7 +1208,7 @@ func (c *Container) reinit(ctx context.Context, retainRetries bool) error {

// Initialize (if necessary) and start a container
// Performs all necessary steps to start a container that is not running
// Does not lock or check validity
// Does not lock or check validity, requires to run on the same thread that holds the lock for the container.
func (c *Container) initAndStart(ctx context.Context) (retErr error) {
// If we are ContainerStateUnknown, throw an error
if c.state.State == define.ContainerStateUnknown {
Expand Down Expand Up @@ -1253,11 +1253,14 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) {
}

// Now start the container
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}

// Internal, non-locking function to start a container
func (c *Container) start(ctx context.Context) error {
func (c *Container) start() error {
if c.config.Spec.Process != nil {
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
}
Expand Down Expand Up @@ -1298,10 +1301,14 @@ func (c *Container) start(ctx context.Context) error {

c.newContainerEvent(events.Start)

if err := c.save(); err != nil {
return err
}
return c.save()
}

// waitForHealthy, when sdNotifyMode == SdNotifyModeHealthy, waits up to the DefaultWaitInterval
// for the container to get into the healthy state and reports the status to the notify socket.
// The function unlocks the container lock, so it must be called from the same thread that locks
// the container.
func (c *Container) waitForHealthy(ctx context.Context) error {
if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
return nil
}
Expand All @@ -1315,6 +1322,9 @@ func (c *Container) start(ctx context.Context) error {
}

if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return err
}

Expand Down Expand Up @@ -1566,6 +1576,7 @@ func (c *Container) unpause() error {
}

// Internal, non-locking function to restart a container
// It requires to run on the same thread that holds the lock.
func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retErr error) {
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) {
return fmt.Errorf("unable to restart a container in a paused or unknown state: %w", define.ErrCtrStateInvalid)
Expand Down Expand Up @@ -1613,7 +1624,10 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
return err
}
}
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}

// mountStorage sets up the container's root filesystem
Expand Down
4 changes: 2 additions & 2 deletions libpod/oci_conmon_attach_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
package libpod

import (
"context"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -32,6 +31,7 @@ const (
// Attach to the given container.
// Does not check if state is appropriate.
// started is only required if startContainer is true.
// It does not wait for the container to be healthy, it is the caller responsibility to do so.
func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
passthrough := c.LogDriver() == define.PassthroughLogging || c.LogDriver() == define.PassthroughTTYLogging

Expand Down Expand Up @@ -86,7 +86,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
// If starting was requested, start the container and notify when that's
// done.
if params.Start {
if err := c.start(context.TODO()); err != nil {
if err := c.start(); err != nil {
return err
}
params.Started <- true
Expand Down
14 changes: 8 additions & 6 deletions pkg/domain/infra/abi/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,10 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
return reports, fmt.Errorf("unable to start container %s: %w", ctr.ID(), err)
}

exitCode = ic.GetContainerExitCode(ctx, ctr.Container)
exitCode, err2 := ic.ContainerWaitForExitCode(ctx, ctr.Container)
if err2 != nil {
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err2)
}
reports = append(reports, &entities.ContainerStartReport{
Id: ctr.ID(),
RawInput: ctr.rawInput,
Expand Down Expand Up @@ -1189,7 +1192,7 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
report.ExitCode = define.ExitCode(err)
return &report, err
}
report.ExitCode = ic.GetContainerExitCode(ctx, ctr)
report.ExitCode, _ = ic.ContainerWaitForExitCode(ctx, ctr)
if opts.Rm && !ctr.ShouldRestart(ctx) {
if err := removeContainer(ctr, false); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) ||
Expand All @@ -1203,14 +1206,13 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
return &report, nil
}

func (ic *ContainerEngine) GetContainerExitCode(ctx context.Context, ctr *libpod.Container) int {
func (ic *ContainerEngine) ContainerWaitForExitCode(ctx context.Context, ctr *libpod.Container) (int, error) {
exitCode, err := ctr.Wait(ctx)
if err != nil {
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err)
intExitCode := int(define.ExecErrorCodeNotFound)
return intExitCode
return intExitCode, err
}
return int(exitCode)
return int(exitCode), nil
}

func (ic *ContainerEngine) ContainerLogs(ctx context.Context, namesOrIds []string, options entities.ContainerLogsOptions) error {
Expand Down
9 changes: 9 additions & 0 deletions test/system/260-sdnotify.bats
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,15 @@ READY=1" "Container log after healthcheck run"
is "$output" "running" "make sure container is still running"

run_podman rm -f -t0 $ctr

ctr=$(random_string)
run_podman run --name $ctr \
--health-cmd="touch /terminate" \
--sdnotify=healthy \
$IMAGE sh -c 'while test \! -e /terminate; do sleep 0.1; done; echo finished'
is "$output" "finished" "make sure container exited successfully"
run_podman rm -f -t0 $ctr

_stop_socat
}

Expand Down

1 comment on commit f7a3046

@packit-as-a-service
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

podman-next COPR build failed. @containers/packit-build please check.

Please sign in to comment.