From ae10a00992d398210e4b5dd4fb5bcbda43f700f3 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Thu, 9 May 2024 16:09:29 -0700 Subject: [PATCH] Avoid pushing base layers When we detect a layer we pushed before, stub in an empty layer, with an annotation containing the uncompressed layer DiffID it's replacing. On pull, swap in the uncompressed layer digest so the actual layer in the layer store will be used without trying to pull/use the empty layer. --- imagebuildah/stage_executor.go | 2 + pull.go | 84 ++++++++++++++++++++++ push.go | 123 ++++++++++++++++++++++++++++++++- 3 files changed, 207 insertions(+), 2 deletions(-) diff --git a/imagebuildah/stage_executor.go b/imagebuildah/stage_executor.go index 00f2a288107..047b6a7ab41 100644 --- a/imagebuildah/stage_executor.go +++ b/imagebuildah/stage_executor.go @@ -1972,6 +1972,7 @@ func (s *StageExecutor) pushCache(ctx context.Context, src, cacheKey string) err for _, dest := range destList { logrus.Debugf("trying to push cache to dest: %+v from src:%+v", dest, src) options := buildah.PushOptions{ + Logger: s.executor.logger, Compression: s.executor.compression, SignaturePolicyPath: s.executor.signaturePolicyPath, Store: s.executor.store, @@ -2004,6 +2005,7 @@ func (s *StageExecutor) pullCache(ctx context.Context, cacheKey string) (referen for _, src := range srcList { logrus.Debugf("trying to pull cache from remote repo: %+v", src.DockerReference()) options := buildah.PullOptions{ + Logger: s.executor.logger, SignaturePolicyPath: s.executor.signaturePolicyPath, Store: s.executor.store, SystemContext: s.executor.systemContext, diff --git a/pull.go b/pull.go index 343c61fba73..968ba06a76b 100644 --- a/pull.go +++ b/pull.go @@ -2,6 +2,7 @@ package buildah import ( "context" + "errors" "fmt" "io" "time" @@ -9,13 +10,18 @@ import ( "github.com/containers/buildah/define" "github.com/containers/common/libimage" "github.com/containers/common/pkg/config" + "github.com/containers/image/v5/manifest" "github.com/containers/image/v5/types" encconfig "github.com/containers/ocicrypt/config" "github.com/containers/storage" + digest "github.com/opencontainers/go-digest" + imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/sirupsen/logrus" ) // PullOptions can be used to alter how an image is copied in from somewhere. type PullOptions struct { + Logger *logrus.Logger // SignaturePolicyPath specifies an override location for the signature // policy which should be used for verifying the new image as it is // being written. Except in specific circumstances, no value should be @@ -62,6 +68,16 @@ func Pull(ctx context.Context, imageName string, options PullOptions) (imageID s libimageOptions.OciDecryptConfig = options.OciDecryptConfig libimageOptions.AllTags = options.AllTags libimageOptions.RetryDelay = &options.RetryDelay + logger := logrus.StandardLogger() + if options.Logger != nil { + logger = options.Logger + } + libimageOptions.SourceLookupReferenceFunc = func(ref types.ImageReference) (types.ImageReference, error) { + return substituteStubbedBlobsRef{ + ImageReference: ref, + logger: logger, + }, err + } libimageOptions.DestinationLookupReferenceFunc = cacheLookupReferenceFunc(options.BlobDirectory, types.PreserveOriginal) if options.MaxRetries > 0 { @@ -98,3 +114,71 @@ func Pull(ctx context.Context, imageName string, options PullOptions) (imageID s return pulledImages[0].ID(), nil } + +type substituteStubbedBlobsRef struct { + types.ImageReference + logger *logrus.Logger +} + +func (ref substituteStubbedBlobsRef) NewImageSource(ctx context.Context, sys *types.SystemContext) (types.ImageSource, error) { + src, err := ref.ImageReference.NewImageSource(ctx, sys) + return recordPulledBlobsImageSource{ImageSource: src, logger: ref.logger}, err +} + +type recordPulledBlobsImageSource struct { + types.ImageSource + logger *logrus.Logger +} + +const diffIDAnnotation = "diffid" + +func (src recordPulledBlobsImageSource) LayerInfosForCopy(ctx context.Context, instanceDigest *digest.Digest) ([]types.BlobInfo, error) { + manifestBlob, manifestType, err := src.GetManifest(ctx, instanceDigest) + if err != nil { + return nil, fmt.Errorf("reading image manifest: %w", err) + } + if manifest.MIMETypeIsMultiImage(manifestType) { + return nil, errors.New("can't copy layers for a manifest list (shouldn't be attempted)") + } + man, err := manifest.FromBlob(manifestBlob, manifestType) + if err != nil { + return nil, fmt.Errorf("parsing image manifest for: %w", err) + } + + uncompressedLayerType := "" + switch manifestType { + case imgspecv1.MediaTypeImageManifest: + uncompressedLayerType = imgspecv1.MediaTypeImageLayer + case manifest.DockerV2Schema1MediaType, manifest.DockerV2Schema1SignedMediaType, manifest.DockerV2Schema2MediaType: + uncompressedLayerType = manifest.DockerV2SchemaLayerMediaTypeUncompressed + } + + var ( + changed bool + layerInfos []types.BlobInfo + ) + for _, layerInfo := range man.LayerInfos() { + src.logger.Debugf("layer digest: %s, annotations: %v", layerInfo.Digest.String(), layerInfo.Annotations) + if diffID := layerInfo.Annotations[diffIDAnnotation]; diffID != "" { + src.logger.Debugf("using diffid %s", diffID) + diffIDDigest, err := digest.Parse(diffID) + if err != nil { + return nil, fmt.Errorf("parsing diffid %q: %w", diffID, err) + } + layerInfos = append(layerInfos, types.BlobInfo{ + Digest: diffIDDigest, + Size: -1, + MediaType: uncompressedLayerType, + }) + changed = true + } else { + layerInfos = append(layerInfos, layerInfo.BlobInfo) + } + } + if changed { + src.logger.Infof("Reusing existing layers on disk which were stubbed in cache push") + return layerInfos, nil + } + + return src.ImageSource.LayerInfosForCopy(ctx, instanceDigest) +} diff --git a/push.go b/push.go index 2e2b9498ae4..1ae9ea9efb7 100644 --- a/push.go +++ b/push.go @@ -1,15 +1,22 @@ package buildah import ( + "bytes" "context" + "encoding/json" + "errors" "fmt" "io" + "strings" "time" "github.com/containers/buildah/pkg/blobcache" "github.com/containers/common/libimage" + "github.com/containers/image/v5/docker" "github.com/containers/image/v5/docker/reference" + "github.com/containers/image/v5/image" "github.com/containers/image/v5/manifest" + "github.com/containers/image/v5/pkg/blobinfocache" "github.com/containers/image/v5/pkg/compression" "github.com/containers/image/v5/transports" "github.com/containers/image/v5/types" @@ -17,6 +24,7 @@ import ( "github.com/containers/storage" "github.com/containers/storage/pkg/archive" digest "github.com/opencontainers/go-digest" + imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1" "github.com/sirupsen/logrus" ) @@ -40,6 +48,7 @@ func cacheLookupReferenceFunc(directory string, compress types.LayerCompression) // PushOptions can be used to alter how an image is copied somewhere. type PushOptions struct { + Logger *logrus.Logger // Compression specifies the type of compression which is applied to // layer blobs. The default is to not use compression, but // archive.Gzip is recommended. @@ -122,10 +131,19 @@ func Push(ctx context.Context, image string, dest types.ImageReference, options } compress := types.PreserveOriginal - if options.Compression == archive.Gzip { + if options.Compression == archive.Gzip || options.Compression == archive.Zstd { compress = types.Compress } - libimageOptions.SourceLookupReferenceFunc = cacheLookupReferenceFunc(options.BlobDirectory, compress) + realBlobCache := cacheLookupReferenceFunc(options.BlobDirectory, compress) + libimageOptions.SourceLookupReferenceFunc = func(ref types.ImageReference) (types.ImageReference, error) { + options.Logger.Debugf("Looking up source image %q %q", ref.Transport().Name(), ref.StringWithinTransport()) + src, err := realBlobCache(ref) + return stubbedBlobsImageReference{ + ImageReference: src, + destRef: dest, + logger: options.Logger, + }, err + } runtime, err := libimage.RuntimeFromStore(options.Store, &libimage.RuntimeOptions{SystemContext: options.SystemContext}) if err != nil { @@ -153,3 +171,104 @@ func Push(ctx context.Context, image string, dest types.ImageReference, options return ref, manifestDigest, nil } + +type stubbedBlobsImageReference struct { + types.ImageReference + destRef types.ImageReference + logger *logrus.Logger +} + +func (ref stubbedBlobsImageReference) NewImageSource(ctx context.Context, sys *types.SystemContext) (types.ImageSource, error) { + src, err := ref.ImageReference.NewImageSource(ctx, sys) + return stubbedBlobsImageSource{ + ImageSource: src, + destRef: ref.destRef, + logger: ref.logger, + cache: blobinfocache.DefaultCache(sys), + }, err +} + +type stubbedBlobsImageSource struct { + types.ImageSource + destRef types.ImageReference + logger *logrus.Logger + cache types.BlobInfoCache +} + +func (src stubbedBlobsImageSource) LayerInfosForCopy(ctx context.Context, instanceDigest *digest.Digest) ([]types.BlobInfo, error) { + updatedBlobInfos := []types.BlobInfo{} + infos, err := src.ImageSource.LayerInfosForCopy(ctx, instanceDigest) + if err != nil { + return nil, err + } + if infos == nil { + return nil, nil + } + + manifestBlob, manifestType, err := src.GetManifest(ctx, instanceDigest) + if err != nil { + return nil, fmt.Errorf("reading image manifest: %w", err) + } + if manifest.MIMETypeIsMultiImage(manifestType) { + return nil, errors.New("can't copy layers for a manifest list (shouldn't be attempted)") + } + + var manifestStub struct { + Annotations map[string]string `json:"annotations"` + } + if err := json.Unmarshal(manifestBlob, &manifestStub); err != nil { + return nil, fmt.Errorf("parsing image manifest in LayerInfosForCopy: %w", err) + } + + baseImageRegistry := "" + if baseImage, ok := manifestStub.Annotations["org.opencontainers.image.base.name"]; ok { + if registry, _, ok := strings.Cut(baseImage, "/"); ok { + baseImageRegistry = registry + src.logger.Debugf("found base image registry %s", baseImageRegistry) + } + } + + destRegistry := reference.Domain(src.destRef.DockerReference()) + + changed := false + for _, layerBlob := range infos { + src.logger.Debugf("blob %s", layerBlob.Digest) + var candidates []types.BICReplacementCandidate + if baseImageRegistry != "" { + candidates = src.cache.CandidateLocations(docker.Transport, types.BICTransportScope{Opaque: baseImageRegistry}, layerBlob.Digest, true) + } + if len(candidates) == 0 { + candidates = src.cache.CandidateLocations(docker.Transport, types.BICTransportScope{Opaque: destRegistry}, layerBlob.Digest, false) + } + if len(candidates) > 0 { + // We have a cached blob reference for this layer - that means + // we've pulled or pushed it before and there's no need to push + // it to cache. + src.logger.Debugf("stubbing layer %s", layerBlob.Digest) + blobInfo := types.BlobInfo{ + Digest: image.GzippedEmptyLayerDigest, + Size: int64(len(image.GzippedEmptyLayer)), + MediaType: imgspecv1.MediaTypeImageLayerGzip, + Annotations: map[string]string{ + diffIDAnnotation: layerBlob.Digest.String(), + }, + } + updatedBlobInfos = append(updatedBlobInfos, blobInfo) + changed = true + } else { + updatedBlobInfos = append(updatedBlobInfos, layerBlob) + } + } + if changed { + return updatedBlobInfos, nil + } + return infos, nil +} + +func (src stubbedBlobsImageSource) GetBlob(ctx context.Context, info types.BlobInfo, infoCache types.BlobInfoCache) (io.ReadCloser, int64, error) { + if info.Digest == image.GzippedEmptyLayerDigest { + src.logger.Debugf("returning empty blob") + return io.NopCloser(bytes.NewReader(image.GzippedEmptyLayer)), int64(len(image.GzippedEmptyLayer)), nil + } + return src.ImageSource.GetBlob(ctx, info, infoCache) +}