diff --git a/go.mod b/go.mod index 56cf30594..f0b2663f2 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,7 @@ require ( github.com/containerd/errdefs v0.3.0 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/log v0.1.0 // indirect - github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect + github.com/containerd/stargz-snapshotter/estargz v0.16.1 // indirect github.com/containerd/typeurl/v2 v2.2.0 // indirect github.com/coreos/go-oidc/v3 v3.11.0 // indirect github.com/cyphar/filepath-securejoin v0.3.4 // indirect @@ -104,7 +104,7 @@ require ( github.com/mistifyio/go-zfs/v3 v3.0.1 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect - github.com/moby/sys/capability v0.3.0 // indirect + github.com/moby/sys/capability v0.4.0 // indirect github.com/moby/sys/mountinfo v0.7.2 // indirect github.com/moby/sys/user v0.3.0 // indirect github.com/moby/term v0.5.0 // indirect @@ -147,3 +147,5 @@ require ( google.golang.org/protobuf v1.34.2 // indirect gotest.tools/v3 v3.5.1 // indirect ) + +replace github.com/containers/storage => github.com/mtrmac/storage v0.0.0-20241115211512-4c9e6d846764 diff --git a/go.sum b/go.sum index 977864565..088da9d62 100644 --- a/go.sum +++ b/go.sum @@ -48,16 +48,14 @@ github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151X github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/containerd/stargz-snapshotter/estargz v0.15.1 h1:eXJjw9RbkLFgioVaTG+G/ZW/0kEe2oEKCdS/ZxIyoCU= -github.com/containerd/stargz-snapshotter/estargz v0.15.1/go.mod h1:gr2RNwukQ/S9Nv33Lt6UC7xEx58C+LHRdoqbEKjz1Kk= +github.com/containerd/stargz-snapshotter/estargz v0.16.1 h1:7YswwU6746cJBN3p3l65JRk3+NZL7bap9Y6E3YeYowk= +github.com/containerd/stargz-snapshotter/estargz v0.16.1/go.mod h1:uyr4BfYfOj3G9WBVE8cOlQmXAbPN9VEQpBBeJIuOipU= github.com/containerd/typeurl/v2 v2.2.0 h1:6NBDbQzr7I5LHgp34xAXYF5DOTQDn05X58lsPEmzLso= github.com/containerd/typeurl/v2 v2.2.0/go.mod h1:8XOOxnyatxSWuG8OfsZXVnAF4iZfedjS/8UHSPJnX4g= github.com/containers/libtrust v0.0.0-20230121012942-c1716e8a8d01 h1:Qzk5C6cYglewc+UyGf6lc8Mj2UaPTHy/iF2De0/77CA= github.com/containers/libtrust v0.0.0-20230121012942-c1716e8a8d01/go.mod h1:9rfv8iPl1ZP7aqh9YA68wnZv2NUDbXdcdPHVz0pFbPY= github.com/containers/ocicrypt v1.2.0 h1:X14EgRK3xNFvJEfI5O4Qn4T3E25ANudSOZz/sirVuPM= github.com/containers/ocicrypt v1.2.0/go.mod h1:ZNviigQajtdlxIZGibvblVuIFBKIuUI2M0QM12SD31U= -github.com/containers/storage v1.56.0 h1:DZ9KSkj6M2tvj/4bBoaJu3QDHRl35BwsZ4kmLJS97ZI= -github.com/containers/storage v1.56.0/go.mod h1:c6WKowcAlED/DkWGNuL9bvGYqIWCVy7isRMdCSKWNjk= github.com/coreos/go-oidc/v3 v3.11.0 h1:Ia3MxdwpSw702YW0xgfmP1GVCMA9aEFWu12XUZ3/OtI= github.com/coreos/go-oidc/v3 v3.11.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0= github.com/cyberphone/json-canonicalization v0.0.0-20231217050601-ba74d44ecf5f h1:eHnXnuK47UlSTOQexbzxAZfekVz6i+LKRdj1CU5DPaM= @@ -233,8 +231,8 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= -github.com/moby/sys/capability v0.3.0 h1:kEP+y6te0gEXIaeQhIi0s7vKs/w0RPoH1qPa6jROcVg= -github.com/moby/sys/capability v0.3.0/go.mod h1:4g9IK291rVkms3LKCDOoYlnV8xKwoDTpIrNEE35Wq0I= +github.com/moby/sys/capability v0.4.0 h1:4D4mI6KlNtWMCM1Z/K0i7RV1FkX+DBDHKVJpCndZoHk= +github.com/moby/sys/capability v0.4.0/go.mod h1:4g9IK291rVkms3LKCDOoYlnV8xKwoDTpIrNEE35Wq0I= github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg= github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4= github.com/moby/sys/user v0.3.0 h1:9ni5DlcW5an3SvRSx4MouotOygvzaXbaSrc/wGDFWPo= @@ -250,6 +248,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= +github.com/mtrmac/storage v0.0.0-20241115211512-4c9e6d846764 h1:eKGr+Bfv/fKR3IAZFxVdgIHsXM7FjWRaGr3QaOrlszE= +github.com/mtrmac/storage v0.0.0-20241115211512-4c9e6d846764/go.mod h1:RPuMmpk0UGbu01hmshf3kHcnjGlLRn9Z/yF5hLuD76g= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= diff --git a/storage/storage_dest.go b/storage/storage_dest.go index bad11706b..6c99ccae4 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -343,6 +343,39 @@ func (f *zstdFetcher) GetBlobAt(chunks []chunked.ImageSourceChunk) (chan io.Read // If the call fails with ErrFallbackToOrdinaryLayerDownload, the caller can fall back to PutBlobWithOptions. // The fallback _must not_ be done otherwise. func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, options private.PutBlobPartialOptions) (_ private.UploadedBlob, retErr error) { + // The identity of partially-pulled layers is, as long as we keep compatibility with tar-like consumers, + // unfixably ambiguous: there are two possible “views” of the same file (same compressed digest), + // the traditional “view” that decompresses the primary stream and consumes a tar file, + // and the partial-pull “view” that starts with the TOC. + // The two “views” have two separate metadata sets and may refer to different parts of the blob for file contents; + // the direct way to ensure they are consistent would be to read the full primary stream (and authenticate it against + // the compressed digest), and ensure the metadata and layer contents exactly match the partially-pulled contents - + // making the partial pull completely pointless. + // + // Instead, we require the image to “commit” to uncompressed layer digest values via the config's RootFS.DiffIDs array: + // they are already naturally computed for traditionally-pulled layers, and for partially-pulled layers we + // do the optimal partial pull, and then reconstruct the uncompressed tar stream just to (expensively) compute this digest. + // + // So, if the input image doesn't have RootFS.DiffIDs (i.e. is schema1), don't bother with partial pulls at all, + // and always use the traditional “view”. + // (Hypothetically, the user can opt out of the DiffID commitment by a c/storage option, giving up security for performance, + // but schema1 images don't support layer annotations anyway, so neither zstd:chunked nor estargz layers can + // benefit from partial pulls anyway — so this is not giving up on partial pull functionality; OTOH it may + // interfere with composefs conversion, which currently only happens on the partial pull code path.) + untrustedDiffID, err := s.untrustedLayerDiffID(options.LayerIndex) + if err != nil { + if errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable) { + // PutBlobPartial is a private API, so all callers are within c/image, and should have called + // NoteOriginalOCIConfig first. + return private.UploadedBlob{}, fmt.Errorf("internal error: in PutBlobPartial, untrustedLayerDiffID returned errUntrustedLayerDiffIDNotYetAvailable") + } + var e untrustedLayerDiffIDUnknownError + if errors.As(err, &e) { + return private.UploadedBlob{}, private.NewErrFallbackToOrdinaryLayerDownload(err) + } + return private.UploadedBlob{}, err + } + fetcher := zstdFetcher{ chunkAccessor: chunkAccessor, ctx: ctx, @@ -382,26 +415,35 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces if err := func() error { // A scope for defer defer s.lock.Unlock() + // For true partial pulls, c/storage decides whether to compute the uncompressed digest based on an option in storage.conf. + // c/storage can also be configured, on this code path, to consume the full blob (primarily to allow composefs conversion), + // and in that case it always consumes the full blob and always computes the uncompressed digest. if out.UncompressedDigest != "" { + // This is centrally enforced later, in commitLayer, but because we have the value available, + // we might just as well check immediately. + if out.UncompressedDigest != untrustedDiffID { + return fmt.Errorf("uncompressed digest of layer %q is %q, config claims %q", srcInfo.Digest.String(), + out.UncompressedDigest.String(), untrustedDiffID.String()) + } + s.lockProtected.indexToDiffID[options.LayerIndex] = out.UncompressedDigest if out.TOCDigest != "" { + s.lockProtected.indexToTOCDigest[options.LayerIndex] = out.TOCDigest options.Cache.RecordTOCUncompressedPair(out.TOCDigest, out.UncompressedDigest) } - // Don’t set indexToTOCDigest on this path: - // - Using UncompressedDigest allows image reuse with non-partially-pulled layers, so we want to set indexToDiffID. - // - If UncompressedDigest has been computed, that means the layer was read completely, and the TOC has been created from scratch. - // That TOC is quite unlikely to match any other TOC value. - // The computation of UncompressedDigest means the whole layer has been consumed; while doing that, chunked.GetDiffer is + // If the whole layer has been consumed, chunked.GetDiffer is // responsible for ensuring blobDigest has been validated. - if out.CompressedDigest != blobDigest { - return fmt.Errorf("internal error: PrepareStagedLayer returned CompressedDigest %q not matching expected %q", - out.CompressedDigest, blobDigest) + if out.CompressedDigest != "" { + if out.CompressedDigest != blobDigest { + return fmt.Errorf("internal error: PrepareStagedLayer returned CompressedDigest %q not matching expected %q", + out.CompressedDigest, blobDigest) + } + // So, record also information about blobDigest, that might benefit reuse. + // We trust PrepareStagedLayer to validate or create both values correctly. + s.lockProtected.blobDiffIDs[blobDigest] = out.UncompressedDigest + options.Cache.RecordDigestUncompressedPair(out.CompressedDigest, out.UncompressedDigest) } - // So, record also information about blobDigest, that might benefit reuse. - // We trust PrepareStagedLayer to validate or create both values correctly. - s.lockProtected.blobDiffIDs[blobDigest] = out.UncompressedDigest - options.Cache.RecordDigestUncompressedPair(out.CompressedDigest, out.UncompressedDigest) } else { // Use diffID for layer identity if it is known. if uncompressedDigest := options.Cache.UncompressedDigestForTOC(out.TOCDigest); uncompressedDigest != "" { @@ -609,6 +651,13 @@ type trustedLayerIdentityData struct { blobDigest digest.Digest // A digest of the (possibly-compressed) layer as presented, or "" if unknown/untrusted. } +// logString() prints a representation of trusted suitable identifying a layer in logs and errors. +// The string is already quoted to expose malicious input and does not need to be quoted again. +// Note that it does not include _all_ of the contents. +func (trusted trustedLayerIdentityData) logString() string { + return fmt.Sprintf("%q/%q/%q", trusted.blobDigest, trusted.tocDigest, trusted.diffID) +} + // trustedLayerIdentityDataLocked returns a _consistent_ set of information for a layer with (layerIndex, blobDigest). // blobDigest is the (possibly-compressed) layer digest referenced in the manifest. // It returns (trusted, true) if the layer was found, or (_, false) if insufficient data is available. @@ -819,23 +868,6 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo) return nil } -// singleLayerIDComponent returns a single layer’s the input to computing a layer (chain) ID, -// and an indication whether the input already has the shape of a layer ID. -// It returns ("", false) if the layer is not found at all (which should never happen) -func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDigest digest.Digest) (string, bool) { - s.lock.Lock() - defer s.lock.Unlock() - - trusted, ok := s.trustedLayerIdentityDataLocked(layerIndex, blobDigest) - if !ok { - return "", false - } - if trusted.layerIdentifiedByTOC { - return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. - } - return trusted.diffID.Encoded(), true // This looks like chain IDs, and it uses the traditional value. -} - // commitLayer commits the specified layer with the given index to the storage. // size can usually be -1; it can be provided if the layer is not known to be already present in blobDiffIDs. // @@ -847,16 +879,15 @@ func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDig // must guarantee that, at any given time, at most one goroutine may execute // `commitLayer()`. func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, size int64) (bool, error) { - // Already committed? Return early. if _, alreadyCommitted := s.indexToStorageID[index]; alreadyCommitted { return false, nil } - // Start with an empty string or the previous layer ID. Note that - // `s.indexToStorageID` can only be accessed by *one* goroutine at any - // given time. Hence, we don't need to lock accesses. - var parentLayer string + var parentLayer string // "" if no parent if index != 0 { + // s.indexToStorageID can only be written by this function, and the our caller + // is responsible for ensuring it can be only be called by *one* goroutine at any + // given time. Hence, we don't need to lock accesses. prev, ok := s.indexToStorageID[index-1] if !ok { return false, fmt.Errorf("Internal error: commitLayer called with previous layer %d not committed yet", index-1) @@ -864,18 +895,17 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si parentLayer = prev } - // Carry over the previous ID for empty non-base layers. if info.emptyLayer { s.indexToStorageID[index] = parentLayer return false, nil } - // Check if there's already a layer with the ID that we'd give to the result of applying - // this layer blob to its parent, if it has one, or the blob's hex value otherwise. - // The layerID refers either to the DiffID or the digest of the TOC. - layerIDComponent, layerIDComponentStandalone := s.singleLayerIDComponent(index, info.digest) - if layerIDComponent == "" { - // Check if it's elsewhere and the caller just forgot to pass it to us in a PutBlob() / TryReusingBlob() / … + // Collect trusted parameters of the layer. + s.lock.Lock() + trusted, ok := s.trustedLayerIdentityDataLocked(index, info.digest) + s.lock.Unlock() + if !ok { + // Check if the layer exists already and the caller just (incorrectly) forgot to pass it to us in a PutBlob() / TryReusingBlob() / … // // Use none.NoCache to avoid a repeated DiffID lookup in the BlobInfoCache: a caller // that relies on using a blob digest that has never been seen by the store had better call @@ -899,23 +929,47 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, fmt.Errorf("error determining uncompressed digest for blob %q", info.digest.String()) } - layerIDComponent, layerIDComponentStandalone = s.singleLayerIDComponent(index, info.digest) - if layerIDComponent == "" { + s.lock.Lock() + trusted, ok = s.trustedLayerIdentityDataLocked(index, info.digest) + s.lock.Unlock() + if !ok { return false, fmt.Errorf("we have blob %q, but don't know its layer ID", info.digest.String()) } } - id := layerIDComponent - if !layerIDComponentStandalone || parentLayer != "" { - id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded() + // Ensure that we always see the same “view” of a layer, as identified by the layer’s uncompressed digest, + // unless the user has explicitly opted out of this in storage.conf: see the more detailed explanation in PutBlobPartial. + if trusted.diffID != "" { + untrustedDiffID, err := s.untrustedLayerDiffID(index) + if err != nil { + if errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable) { + logrus.Debugf("Skipping commit for layer %q, manifest not yet available for DiffID check", index) + return true, nil + } + var e untrustedLayerDiffIDUnknownError + if !errors.As(err, &e) { + return false, err + } + // If untrustedLayerDiffIDUnknownError, the input image is schema1 and we should not have done a partial pull or a TOC-based match. + // FIXME: review tryReusingBlob… and PutBlobPartial to verify + if trusted.layerIdentifiedByTOC { + return false, fmt.Errorf("internal error: layer %d for blob %s was identified by TOC, but we don't have a DiffID in config", + index, trusted.logString()) + } + } else if trusted.diffID != untrustedDiffID { + return false, fmt.Errorf("layer %d (blob %s) does not match config's DiffID %q", index, trusted.logString(), untrustedDiffID) + } } + + id := layerID(parentLayer, trusted) + if layer, err2 := s.imageRef.transport.store.Layer(id); layer != nil && err2 == nil { // There's already a layer that should have the right contents, just reuse it. s.indexToStorageID[index] = layer.ID return false, nil } - layer, err := s.createNewLayer(index, info.digest, parentLayer, id) + layer, err := s.createNewLayer(index, trusted, parentLayer, id) if err != nil { return false, err } @@ -926,33 +980,58 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, nil } -// createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be ""). +// layerID computes a layer (“chain”) ID for (a possibly-empty parentID, trusted) +func layerID(parentID string, trusted trustedLayerIdentityData) string { + var component string + mustHash := false + if trusted.layerIdentifiedByTOC { + // "@" is not a valid start of a digest.Digest.Encoded(), so this is unambiguous with the !layerIdentifiedByTOC case. + // But we _must_ hash this below to get a Digest.Encoded()-formatted value. + component = "@TOC=" + trusted.tocDigest.Encoded() + mustHash = true + } else { + component = trusted.diffID.Encoded() // This looks like chain IDs, and it uses the traditional value. + } + + if parentID == "" && !mustHash { + return component + } + return digest.Canonical.FromString(parentID + "+" + component).Encoded() +} + +// createNewLayer creates a new layer newLayerID for (index, trusted) on top of parentLayer (which may be ""). // If the layer cannot be committed yet, the function returns (nil, nil). -func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.Digest, parentLayer, newLayerID string) (*storage.Layer, error) { +func (s *storageImageDestination) createNewLayer(index int, trusted trustedLayerIdentityData, parentLayer, newLayerID string) (*storage.Layer, error) { s.lock.Lock() diffOutput, ok := s.lockProtected.diffOutputs[index] s.lock.Unlock() if ok { - // If we know a trusted DiffID value (e.g. from a BlobInfoCache), set it in diffOutput. + // Typically, we compute a trusted DiffID value to authenticate the layer contents, see the detailed explanation + // in PutBlobPartial. If the user has opted out of that, but we know a trusted DiffID value + // (e.g. from a BlobInfoCache), set it in diffOutput. // That way it will be persisted in storage even if the cache is deleted; also - // we can use the value below to avoid the untrustedUncompressedDigest logic (and notably - // the costly commit delay until a manifest is available). - s.lock.Lock() - if d, ok := s.lockProtected.indexToDiffID[index]; ok { - diffOutput.UncompressedDigest = d + // we can use the value below to avoid the untrustedUncompressedDigest logic. + if diffOutput.UncompressedDigest == "" && trusted.diffID != "" { + diffOutput.UncompressedDigest = trusted.diffID } - s.lock.Unlock() var untrustedUncompressedDigest digest.Digest if diffOutput.UncompressedDigest == "" { d, err := s.untrustedLayerDiffID(index) if err != nil { + if errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable) { + logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID) + return nil, nil + } + var e untrustedLayerDiffIDUnknownError + if errors.As(err, &e) { + // If untrustedLayerDiffIDUnknownError, the input image is schema1 and we should not have done a partial pull or a TOC-based match. + // FIXME: review tryReusingBlob… and PutBlobPartial to verify + return nil, fmt.Errorf("internal error: layer %d for blob %s was partially-pulled, but we don't have a DiffID in config", + index, trusted.logString()) + } return nil, err } - if d == "" { - logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID) - return nil, nil - } untrustedUncompressedDigest = d // While the contents of the digest are untrusted, make sure at least the _format_ is valid, @@ -999,14 +1078,10 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // then we need to read the desired contents from a layer. var filename string var gotFilename bool - s.lock.Lock() - trusted, ok := s.trustedLayerIdentityDataLocked(index, layerDigest) - if ok && trusted.blobDigest != "" { + if trusted.blobDigest != "" { + s.lock.Lock() filename, gotFilename = s.lockProtected.filenames[trusted.blobDigest] - } - s.lock.Unlock() - if !ok { // We have already determined newLayerID, so the data must have been available. - return nil, fmt.Errorf("internal inconsistency: layer (%d, %q) not found", index, layerDigest) + s.lock.Unlock() } var trustedOriginalDigest digest.Digest // For storage.LayerOptions var trustedOriginalSize *int64 @@ -1033,7 +1108,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } } if layer == nil { - return nil, fmt.Errorf("layer for blob %q/%q/%q not found", trusted.blobDigest, trusted.tocDigest, trusted.diffID) + return nil, fmt.Errorf("layer for blob %s not found", trusted.logString()) } // Read the layer's contents. @@ -1043,7 +1118,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } diff, err2 := s.imageRef.transport.store.Diff("", layer.ID, diffOptions) if err2 != nil { - return nil, fmt.Errorf("reading layer %q for blob %q/%q/%q: %w", layer.ID, trusted.blobDigest, trusted.tocDigest, trusted.diffID, err2) + return nil, fmt.Errorf("reading layer %q for blob %s: %w", layer.ID, trusted.logString(), err2) } // Copy the layer diff to a file. Diff() takes a lock that it holds // until the ReadCloser that it returns is closed, and PutLayer() wants @@ -1122,7 +1197,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D UncompressedDigest: trusted.diffID, }, file) if err != nil && !errors.Is(err, storage.ErrDuplicateID) { - return nil, fmt.Errorf("adding layer with blob %q/%q/%q: %w", trusted.blobDigest, trusted.tocDigest, trusted.diffID, err) + return nil, fmt.Errorf("adding layer with blob %s: %w", trusted.logString(), err) } return layer, nil } @@ -1172,9 +1247,29 @@ func (u *uncommittedImageSource) GetBlob(ctx context.Context, info types.BlobInf return io.NopCloser(bytes.NewReader(blob)), int64(len(blob)), nil } +// errUntrustedLayerDiffIDNotYetAvailable is returned by untrustedLayerDiffID +// if the value is not yet available (but it can be available after s.manifests is set). +// This should only happen for external callers of the transport, not for c/image/copy. +// +// Callers of untrustedLayerDiffID before PutManifest must handle this error specially; +// callers after PutManifest can use the default, reporting an internal error. +var errUntrustedLayerDiffIDNotYetAvailable = errors.New("internal error: untrustedLayerDiffID has no value available and fallback was not implemented") + +// untrustedLayerDiffIDUnknownError is returned by untrustedLayerDiffID +// if the image’s format does not provide DiffIDs. +type untrustedLayerDiffIDUnknownError struct { + layerIndex int +} + +func (e untrustedLayerDiffIDUnknownError) Error() string { + return fmt.Sprintf("DiffID value for layer %d is unknown or explicitly empty", e.layerIndex) +} + // untrustedLayerDiffID returns a DiffID value for layerIndex from the image’s config. -// If the value is not yet available (but it can be available after s.manifets is set), it returns ("", nil). -// WARNING: We don’t validate the DiffID value against the layer contents; it must not be used for any deduplication. +// It may return two special errors, errUntrustedLayerDiffIDNotYetAvailable or untrustedLayerDiffIDUnknownError. +// +// WARNING: This function does not even validate that the returned digest has a valid format. +// WARNING: We don’t _always_ validate this DiffID value against the layer contents; it must not be used for any deduplication. func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.Digest, error) { // At this point, we are either inside the multi-threaded scope of HasThreadSafePutBlob, // nothing is writing to s.manifest yet, and s.untrustedDiffIDValues might have been set @@ -1187,7 +1282,7 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D // via NoteOriginalOCIConfig; this is a compatibility fallback for external callers // of the public types.ImageDestination. if s.manifest == nil { - return "", nil + return "", errUntrustedLayerDiffIDNotYetAvailable } ctx := context.Background() // This is all happening in memory, no need to worry about cancellation. @@ -1209,17 +1304,12 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D } res := s.untrustedDiffIDValues[layerIndex] if res == "" { - // In practice, this should, right now, only matter for pulls of OCI images - // (this code path implies that we did a partial pull because a layer has an annotation), - // So, DiffIDs should always be set. - // - // It is, though, reachable by pulling an OCI image while converting to schema1, - // using a manual (skopeo copy) or something similar, not (podman pull). + // schema1 images don’t have DiffID values in the config. // // Our schema1.OCIConfig code produces non-empty DiffID arrays of empty values. - // The current semantics of this function are that ("", nil) means "try again later", - // which is not what we want to happen; for now, turn that into an explicit error. - return "", fmt.Errorf("DiffID value for layer %d is unknown or explicitly empty", layerIndex) + return "", untrustedLayerDiffIDUnknownError{ + layerIndex: layerIndex, + } } return res, nil } diff --git a/storage/storage_test.go b/storage/storage_test.go index 6f90495fd..870688933 100644 --- a/storage/storage_test.go +++ b/storage/storage_test.go @@ -9,6 +9,7 @@ import ( "context" "crypto/rand" "crypto/sha256" + "encoding/json" "errors" "flag" "fmt" @@ -187,21 +188,25 @@ func TestParseWithGraphDriverOptions(t *testing.T) { } } -// makeLayerGoroutine writes to pwriter, and on success, updates uncompressedCount +// makeLayerGoroutine writes to dest, and on success, updates uncompressedCount and uncompressedDigest // before it terminates. -func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression archive.Compression) error { - var uncompressed *ioutils.WriteCounter +func makeLayerGoroutine(dest io.Writer, uncompressedCount *int64, uncompressedDigest *digest.Digest, compression archive.Compression) (retErr error) { if compression != archive.Uncompressed { - compressor, err := archive.CompressStream(pwriter, compression) + compressor, err := archive.CompressStream(dest, compression) if err != nil { return fmt.Errorf("compressing layer: %w", err) } - defer compressor.Close() - uncompressed = ioutils.NewWriteCounter(compressor) - } else { - uncompressed = ioutils.NewWriteCounter(pwriter) + defer func() { + if err := compressor.Close(); err != nil && retErr == nil { + retErr = err + } + }() + dest = compressor } - twriter := tar.NewWriter(uncompressed) + + uncompressedCounter := ioutils.NewWriteCounter(dest) + uncompressedDigester := digest.Canonical.Digester() + twriter := tar.NewWriter(io.MultiWriter(uncompressedCounter, uncompressedDigester.Hash())) // defer twriter.Close() // should be called here to correctly terminate the archive. // We do not do that, to workaround https://github.com/containers/storage/issues/1729 : @@ -244,36 +249,40 @@ func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression if err := twriter.Flush(); err != nil { return fmt.Errorf("Error flushing output to tar archive: %w", err) } - *uncompressedCount = uncompressed.Count + *uncompressedCount = uncompressedCounter.Count + *uncompressedDigest = uncompressedDigester.Digest() return nil } type testBlob struct { - compressedDigest digest.Digest - uncompressedSize int64 - compressedSize int64 - data []byte + uncompressedDigest digest.Digest + compressedDigest digest.Digest + uncompressedSize int64 + compressedSize int64 + data []byte } func makeLayer(t *testing.T, compression archive.Compression) testBlob { preader, pwriter := io.Pipe() var uncompressedCount int64 + var uncompressedDigest digest.Digest go func() { err := errors.New("Internal error: unexpected panic in makeLayer") defer func() { // Note that this is not the same as {defer pipeWriter.CloseWithError(err)}; we need err to be evaluated lazily. _ = pwriter.CloseWithError(err) }() - err = makeLayerGoroutine(pwriter, &uncompressedCount, compression) + err = makeLayerGoroutine(pwriter, &uncompressedCount, &uncompressedDigest, compression) }() tbuffer := bytes.Buffer{} _, err := io.Copy(&tbuffer, preader) require.NoError(t, err) return testBlob{ - compressedDigest: digest.SHA256.FromBytes(tbuffer.Bytes()), - uncompressedSize: uncompressedCount, - compressedSize: int64(tbuffer.Len()), - data: tbuffer.Bytes(), + uncompressedDigest: uncompressedDigest, + compressedDigest: digest.SHA256.FromBytes(tbuffer.Bytes()), + uncompressedSize: uncompressedCount, + compressedSize: int64(tbuffer.Len()), + data: tbuffer.Bytes(), } } @@ -305,6 +314,40 @@ func ensureTestCanCreateImages(t *testing.T) { } } +// configForLayers returns a minimally-plausible config for layers +func configForLayers(t *testing.T, layers []testBlob) testBlob { + rootFS := manifest.Schema2RootFS{ + Type: "layers", + DiffIDs: []digest.Digest{}, + } + for _, l := range layers { + rootFS.DiffIDs = append(rootFS.DiffIDs, l.uncompressedDigest) + } + // Add a unique label so that different calls to configForLayers don’t try to use the same image ID. + randomBytes := make([]byte, digest.Canonical.Size()) + _, err := rand.Read(randomBytes) + require.NoError(t, err) + config := manifest.Schema2Image{ + Schema2V1Image: manifest.Schema2V1Image{ + Config: &manifest.Schema2Config{ + Labels: map[string]string{"unique": fmt.Sprintf("%x", randomBytes)}, + }, + Created: time.Now(), + }, + RootFS: &rootFS, + } + configBytes, err := json.Marshal(config) + require.NoError(t, err) + configDigest := digest.Canonical.FromBytes(configBytes) + return testBlob{ + uncompressedDigest: configDigest, + compressedDigest: configDigest, + uncompressedSize: int64(len(configBytes)), + compressedSize: int64(len(configBytes)), + data: configBytes, + } +} + func createUncommittedImageDest(t *testing.T, ref types.ImageReference, cache types.BlobInfoCache, layers []testBlob, config *testBlob) (types.ImageDestination, types.UnparsedImage) { dest, err := ref.NewImageDestination(context.Background(), nil) @@ -316,21 +359,11 @@ func createUncommittedImageDest(t *testing.T, ref types.ImageReference, cache ty layerDescriptors = append(layerDescriptors, desc) } - var configDescriptor manifest.Schema2Descriptor - if config != nil { - configDescriptor = config.storeBlob(t, dest, cache, manifest.DockerV2Schema2ConfigMediaType, true) - } else { - // Use a random digest so that different calls to createUncommittedImageDest with config == nil don’t try to - // use the same image ID. - digestBytes := make([]byte, digest.Canonical.Size()) - _, err := rand.Read(digestBytes) - require.NoError(t, err) - configDescriptor = manifest.Schema2Descriptor{ - MediaType: manifest.DockerV2Schema2ConfigMediaType, - Size: 1, - Digest: digest.NewDigestFromBytes(digest.Canonical, digestBytes), - } + if config == nil { + cd := configForLayers(t, layers) + config = &cd } + configDescriptor := config.storeBlob(t, dest, cache, manifest.DockerV2Schema2ConfigMediaType, true) manifest := manifest.Schema2FromComponents(configDescriptor, layerDescriptors) manifestBytes, err := manifest.Serialize() @@ -358,14 +391,6 @@ func createImage(t *testing.T, ref types.ImageReference, cache types.BlobInfoCac func TestWriteRead(t *testing.T) { ensureTestCanCreateImages(t) - configBytes := []byte(`{"config":{"labels":{}},"created":"2006-01-02T15:04:05Z"}`) - config := testBlob{ - compressedDigest: digest.SHA256.FromBytes(configBytes), - uncompressedSize: int64(len(configBytes)), - compressedSize: int64(len(configBytes)), - data: configBytes, - } - manifests := []string{ //`{ // "schemaVersion": 2, @@ -443,6 +468,7 @@ func TestWriteRead(t *testing.T) { layer := makeLayer(t, compression) _ = layer.storeBlob(t, dest, cache, manifest.DockerV2Schema2LayerMediaType, false) t.Logf("Wrote randomly-generated layer %q (%d/%d bytes) to destination", layer.compressedDigest, layer.compressedSize, layer.uncompressedSize) + config := configForLayers(t, []testBlob{layer}) _ = config.storeBlob(t, dest, cache, manifest.DockerV2Schema2ConfigMediaType, true) manifest := strings.ReplaceAll(manifestFmt, "%lh", layer.compressedDigest.String()) @@ -635,18 +661,13 @@ func TestSize(t *testing.T) { layer1 := makeLayer(t, archive.Gzip) layer2 := makeLayer(t, archive.Gzip) - configBytes := []byte(`{"config":{"labels":{}},"created":"2006-01-02T15:04:05Z"}`) - config := testBlob{ - compressedDigest: digest.SHA256.FromBytes(configBytes), - uncompressedSize: int64(len(configBytes)), - compressedSize: int64(len(configBytes)), - data: configBytes, - } + layerBlobs := []testBlob{layer1, layer2} + config := configForLayers(t, layerBlobs) ref, err := Transport.ParseReference("test") require.NoError(t, err) - createImage(t, ref, cache, []testBlob{layer1, layer2}, &config) + createImage(t, ref, cache, layerBlobs, &config) img, err := ref.NewImage(context.Background(), nil) require.NoError(t, err) @@ -673,15 +694,9 @@ func TestDuplicateBlob(t *testing.T) { layer1 := makeLayer(t, archive.Gzip) layer2 := makeLayer(t, archive.Gzip) - configBytes := []byte(`{"config":{"labels":{}},"created":"2006-01-02T15:04:05Z"}`) - config := testBlob{ - compressedDigest: digest.SHA256.FromBytes(configBytes), - uncompressedSize: int64(len(configBytes)), - compressedSize: int64(len(configBytes)), - data: configBytes, - } - - createImage(t, ref, cache, []testBlob{layer1, layer2, layer1, layer2}, &config) + layerBlobs := []testBlob{layer1, layer2, layer1, layer2} + config := configForLayers(t, layerBlobs) + createImage(t, ref, cache, layerBlobs, &config) img, err := ref.NewImage(context.Background(), nil) require.NoError(t, err)