Skip to content

Commit

Permalink
Add podman system check for checking storage consistency
Browse files Browse the repository at this point in the history
Add a `podman system check` that performs consistency checks on local
storage, optionally removing damaged items so that they can be
recreated.

Signed-off-by: Nalin Dahyabhai <[email protected]>
  • Loading branch information
nalind committed Jun 4, 2024
1 parent c510959 commit fec58a4
Show file tree
Hide file tree
Showing 15 changed files with 565 additions and 0 deletions.
138 changes: 138 additions & 0 deletions cmd/podman/system/check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package system

import (
"context"
"errors"
"fmt"
"time"

"github.com/containers/common/pkg/completion"
"github.com/containers/podman/v5/cmd/podman/registry"
"github.com/containers/podman/v5/cmd/podman/validate"
"github.com/containers/podman/v5/pkg/domain/entities/types"
multierror "github.com/hashicorp/go-multierror"
"github.com/spf13/cobra"
)

var (
checkOptions = types.SystemCheckOptions{}
checkDescription = `
podman system check
Check storage for consistency and remove anything that looks damaged
`

checkCommand = &cobra.Command{
Use: "check [options]",
Short: "Check storage consistency",
Args: validate.NoArgs,
Long: checkDescription,
RunE: check,
ValidArgsFunction: completion.AutocompleteNone,
Example: `podman system check`,
}
)

func init() {
registry.Commands = append(registry.Commands, registry.CliCommand{
Command: checkCommand,
Parent: systemCmd,
})
flags := checkCommand.Flags()
flags.BoolVarP(&checkOptions.Quick, "quick", "q", false, "Skip time-consuming checks. The default is to include time-consuming checks")
flags.BoolVarP(&checkOptions.Repair, "repair", "r", false, "Remove inconsistent images")
flags.BoolVarP(&checkOptions.RepairLossy, "force", "f", false, "Remove inconsistent images and containers")
flags.DurationP("max", "m", 24*time.Hour, "Maximum allowed age of unreferenced layers")
_ = checkCommand.RegisterFlagCompletionFunc("max", completion.AutocompleteNone)
}

func check(cmd *cobra.Command, args []string) error {
flags := cmd.Flags()
if flags.Changed("max") {
maxAge, err := flags.GetDuration("max")
if err != nil {
return err
}
checkOptions.UnreferencedLayerMaximumAge = &maxAge
}
response, err := registry.ContainerEngine().SystemCheck(context.Background(), checkOptions)
if err != nil {
return err
}

if err = printSystemCheckResults(response); err != nil {
return err
}

if !checkOptions.Repair && !checkOptions.RepairLossy && response.Errors {
return errors.New("damage detected in local storage")
}

recheckOptions := checkOptions
recheckOptions.Repair = false
recheckOptions.RepairLossy = false
if response, err = registry.ContainerEngine().SystemCheck(context.Background(), recheckOptions); err != nil {
return err
}
if response.Errors {
return errors.New("damage in local storage still present after repair attempt")
}

return nil
}

func printSystemCheckResults(report *types.SystemCheckReport) error {
if !report.Errors {
return nil
}
errorSlice := func(strs []string) []error {
if strs == nil {
return nil
}
errs := make([]error, len(strs))
for i, s := range strs {
errs[i] = errors.New(s)
}
return errs
}
for damagedLayer, errorsSlice := range report.Layers {
merr := multierror.Append(nil, errorSlice(errorsSlice)...)
if err := merr.ErrorOrNil(); err != nil {
fmt.Printf("Damaged layer %s:\n%s", damagedLayer, err)
}
}
for _, removedLayer := range report.RemovedLayers {
fmt.Printf("Deleted damaged layer: %s\n", removedLayer)
}
for damagedROLayer, errorsSlice := range report.ROLayers {
merr := multierror.Append(nil, errorSlice(errorsSlice)...)
if err := merr.ErrorOrNil(); err != nil {
fmt.Printf("Damaged read-only layer %s:\n%s", damagedROLayer, err)
}
}
for damagedImage, errorsSlice := range report.Images {
merr := multierror.Append(nil, errorSlice(errorsSlice)...)
if err := merr.ErrorOrNil(); err != nil {
fmt.Printf("Damaged image %s:\n%s", damagedImage, err)
}
}
for removedImage := range report.RemovedImages {
fmt.Printf("Deleted damaged image: %s\n", removedImage)
}
for damagedROImage, errorsSlice := range report.ROImages {
merr := multierror.Append(nil, errorSlice(errorsSlice)...)
if err := merr.ErrorOrNil(); err != nil {
fmt.Printf("Damaged read-only image %s\n%s", damagedROImage, err)
}
}
for damagedContainer, errorsSlice := range report.Containers {
merr := multierror.Append(nil, errorSlice(errorsSlice)...)
if err := merr.ErrorOrNil(); err != nil {
fmt.Printf("Damaged container %s:\n%s", damagedContainer, err)
}
}
for removedContainer := range report.RemovedContainers {
fmt.Printf("Deleted damaged container: %s\n", removedContainer)
}
return nil
}
59 changes: 59 additions & 0 deletions docs/source/markdown/podman-system-check.1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
% podman-system-check 1

## NAME
podman\-system\-check - Perform consistency checks on image and container storage

## SYNOPSIS
**podman system check** [*options*]

## DESCRIPTION
Perform consistency checks on image and container storage, reporting images and
containers which have identified issues.

## OPTIONS

#### **--force**, **-f**

When attempting to remove damaged images, also remove containers which depend
on those images. By default, damaged images which are being used by containers
are left alone.

Containers which depend on damaged images do so regardless of which engine
created them, but because podman only "knows" how to shut down containers that
it started, the effect on still-running containers which were started by other
engines is difficult to predict.

#### **--max**, **-m**=*duration*

When considering layers which are not used by any images or containers, assume
that any layers which are more than *duration* old are the results of canceled
attempts to pull images, and should be treated as though they are damaged.

#### **--quick**, **-q**

Skip checks which are known to be time-consuming. This will prevent some types
of errors from being detected.

#### **--repair**, **-r**

Remove any images which are determined to have been damaged in some way, unless
they are in use by containers. Use **--force** to remove containers which
depend on damaged images, and those damaged images, as well.

## EXAMPLE

A reasonably quick check:
```
podman system check --quick --repair --force
```

A more thorough check:
```
podman system check --repair --max=1h --force
```

## SEE ALSO
**[podman(1)](podman.1.md)**, **[podman-system(1)](podman-system.1.md)**

## HISTORY
April 2024
1 change: 1 addition & 0 deletions docs/source/markdown/podman-system.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The system command allows management of the podman systems

| Command | Man Page | Description |
| ------- | ------------------------------------------------------------ | ------------------------------------------------------------------------ |
| check | [podman-system-check(1)](podman-system-check.1.md) | Perform consistency checks on image and container storage.
| connection | [podman-system-connection(1)](podman-system-connection.1.md) | Manage the destination(s) for Podman service(s) |
| df | [podman-system-df(1)](podman-system-df.1.md) | Show podman disk usage. |
| events | [podman-events(1)](podman-events.1.md) | Monitor Podman events |
Expand Down
133 changes: 133 additions & 0 deletions libpod/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/containers/podman/v5/libpod/lock"
"github.com/containers/podman/v5/libpod/plugin"
"github.com/containers/podman/v5/libpod/shutdown"
"github.com/containers/podman/v5/pkg/domain/entities"
"github.com/containers/podman/v5/pkg/rootless"
"github.com/containers/podman/v5/pkg/systemd"
"github.com/containers/podman/v5/pkg/util"
Expand All @@ -39,9 +40,11 @@ import (
"github.com/containers/storage/pkg/lockfile"
"github.com/containers/storage/pkg/unshare"
"github.com/docker/docker/pkg/namesgenerator"
"github.com/hashicorp/go-multierror"
jsoniter "github.com/json-iterator/go"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/exp/slices"
)

// Set up the JSON library for all of Libpod
Expand Down Expand Up @@ -1249,3 +1252,133 @@ func (r *Runtime) LockConflicts() (map[uint32][]string, []uint32, error) {

return toReturn, locksHeld, nil
}

// SystemCheck checks our storage for consistency, and depending on the options
// specified, will attempt to remove anything which fails consistency checks.
func (r *Runtime) SystemCheck(ctx context.Context, options entities.SystemCheckOptions) (entities.SystemCheckReport, error) {
what := storage.CheckEverything()
if options.Quick {
what = storage.CheckMost()
}
if options.UnreferencedLayerMaximumAge != nil {
tmp := *options.UnreferencedLayerMaximumAge
what.LayerUnreferencedMaximumAge = &tmp
}
storageReport, err := r.store.Check(what)
if err != nil {
return entities.SystemCheckReport{}, err
}
if len(storageReport.Containers) == 0 &&
len(storageReport.Layers) == 0 &&
len(storageReport.ROLayers) == 0 &&
len(storageReport.Images) == 0 &&
len(storageReport.ROImages) == 0 {
// no errors detected
return entities.SystemCheckReport{}, nil
}
mapErrorSlicesToStringSlices := func(m map[string][]error) map[string][]string {
if len(m) == 0 {
return nil
}
mapped := make(map[string][]string, len(m))
for k, errs := range m {
strs := make([]string, len(errs))
for i, e := range errs {
strs[i] = e.Error()
}
mapped[k] = strs
}
return mapped
}

report := entities.SystemCheckReport{
Errors: true,
Layers: mapErrorSlicesToStringSlices(storageReport.Layers),
ROLayers: mapErrorSlicesToStringSlices(storageReport.ROLayers),
Images: mapErrorSlicesToStringSlices(storageReport.Images),
ROImages: mapErrorSlicesToStringSlices(storageReport.ROImages),
Containers: mapErrorSlicesToStringSlices(storageReport.Containers),
}
if !options.Repair && report.Errors {
// errors detected, no corrective measures to be taken
return report, err
}

// get a list of images that we knew of before we tried to clean up any
// that were damaged
imagesBefore, err := r.store.Images()
if err != nil {
return report, fmt.Errorf("getting a list of images before attempting repairs: %w", err)
}

repairOptions := storage.RepairOptions{
RemoveContainers: options.RepairLossy,
}
var containers []*Container
if repairOptions.RemoveContainers {
// build a list of the containers that we claim as ours that we
// expect to be removing in a bit
for containerID := range storageReport.Containers {
ctr, lookupErr := r.state.LookupContainer(containerID)
if lookupErr != nil {
// we're about to remove it, so it's okay that
// it isn't even one of ours
continue
}
containers = append(containers, ctr)
}
}

// run the cleanup
merr := multierror.Append(nil, r.store.Repair(storageReport, &repairOptions)...)

if repairOptions.RemoveContainers {
// get the list of containers that storage will still admit to knowing about
containersAfter, err := r.store.Containers()
if err != nil {
merr = multierror.Append(merr, fmt.Errorf("getting a list of containers after attempting repairs: %w", err))
}
for _, ctr := range containers {
// if one of our containers that we tried to remove is
// still on disk, report an error
if slices.IndexFunc(containersAfter, func(containerAfter storage.Container) bool {
return containerAfter.ID == ctr.ID()
}) != -1 {
merr = multierror.Append(merr, fmt.Errorf("clearing storage for container %s: %w", ctr.ID(), err))
continue
}
// remove the container from our database
if removeErr := r.state.RemoveContainer(ctr); removeErr != nil {
merr = multierror.Append(merr, fmt.Errorf("updating state database to reflect removal of container %s: %w", ctr.ID(), removeErr))
continue
}
if report.RemovedContainers == nil {
report.RemovedContainers = make(map[string]string)
}
report.RemovedContainers[ctr.ID()] = ctr.config.Name
}
}

// get a list of images that are still around after we clean up any
// that were damaged
imagesAfter, err := r.store.Images()
if err != nil {
merr = multierror.Append(merr, fmt.Errorf("getting a list of images after attempting repairs: %w", err))
}
for _, imageBefore := range imagesBefore {
if slices.IndexFunc(imagesAfter, func(imageAfter storage.Image) bool {
return imageAfter.ID == imageBefore.ID
}) == -1 {
if report.RemovedImages == nil {
report.RemovedImages = make(map[string][]string)
}
report.RemovedImages[imageBefore.ID] = slices.Clone(imageBefore.Names)
}
}

if merr != nil {
err = merr.ErrorOrNil()
}

return report, err
}
Loading

0 comments on commit fec58a4

Please sign in to comment.