From 53f794cca608ee5bc007fc3273e8468338135a14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 30 Oct 2024 15:57:51 +0100 Subject: [PATCH 01/14] DO NOT MERGE: Use https://github.com/containers/storage/pull/2155 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač --- go.mod | 6 ++++-- go.sum | 12 ++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 56cf30594..f0b2663f2 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,7 @@ require ( github.com/containerd/errdefs v0.3.0 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/log v0.1.0 // indirect - github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect + github.com/containerd/stargz-snapshotter/estargz v0.16.1 // indirect github.com/containerd/typeurl/v2 v2.2.0 // indirect github.com/coreos/go-oidc/v3 v3.11.0 // indirect github.com/cyphar/filepath-securejoin v0.3.4 // indirect @@ -104,7 +104,7 @@ require ( github.com/mistifyio/go-zfs/v3 v3.0.1 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect - github.com/moby/sys/capability v0.3.0 // indirect + github.com/moby/sys/capability v0.4.0 // indirect github.com/moby/sys/mountinfo v0.7.2 // indirect github.com/moby/sys/user v0.3.0 // indirect github.com/moby/term v0.5.0 // indirect @@ -147,3 +147,5 @@ require ( google.golang.org/protobuf v1.34.2 // indirect gotest.tools/v3 v3.5.1 // indirect ) + +replace github.com/containers/storage => github.com/mtrmac/storage v0.0.0-20241115211512-4c9e6d846764 diff --git a/go.sum b/go.sum index 977864565..088da9d62 100644 --- a/go.sum +++ b/go.sum @@ -48,16 +48,14 @@ github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151X github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/containerd/stargz-snapshotter/estargz v0.15.1 h1:eXJjw9RbkLFgioVaTG+G/ZW/0kEe2oEKCdS/ZxIyoCU= -github.com/containerd/stargz-snapshotter/estargz v0.15.1/go.mod h1:gr2RNwukQ/S9Nv33Lt6UC7xEx58C+LHRdoqbEKjz1Kk= +github.com/containerd/stargz-snapshotter/estargz v0.16.1 h1:7YswwU6746cJBN3p3l65JRk3+NZL7bap9Y6E3YeYowk= +github.com/containerd/stargz-snapshotter/estargz v0.16.1/go.mod h1:uyr4BfYfOj3G9WBVE8cOlQmXAbPN9VEQpBBeJIuOipU= github.com/containerd/typeurl/v2 v2.2.0 h1:6NBDbQzr7I5LHgp34xAXYF5DOTQDn05X58lsPEmzLso= github.com/containerd/typeurl/v2 v2.2.0/go.mod h1:8XOOxnyatxSWuG8OfsZXVnAF4iZfedjS/8UHSPJnX4g= github.com/containers/libtrust v0.0.0-20230121012942-c1716e8a8d01 h1:Qzk5C6cYglewc+UyGf6lc8Mj2UaPTHy/iF2De0/77CA= github.com/containers/libtrust v0.0.0-20230121012942-c1716e8a8d01/go.mod h1:9rfv8iPl1ZP7aqh9YA68wnZv2NUDbXdcdPHVz0pFbPY= github.com/containers/ocicrypt v1.2.0 h1:X14EgRK3xNFvJEfI5O4Qn4T3E25ANudSOZz/sirVuPM= github.com/containers/ocicrypt v1.2.0/go.mod h1:ZNviigQajtdlxIZGibvblVuIFBKIuUI2M0QM12SD31U= -github.com/containers/storage v1.56.0 h1:DZ9KSkj6M2tvj/4bBoaJu3QDHRl35BwsZ4kmLJS97ZI= -github.com/containers/storage v1.56.0/go.mod h1:c6WKowcAlED/DkWGNuL9bvGYqIWCVy7isRMdCSKWNjk= github.com/coreos/go-oidc/v3 v3.11.0 h1:Ia3MxdwpSw702YW0xgfmP1GVCMA9aEFWu12XUZ3/OtI= github.com/coreos/go-oidc/v3 v3.11.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0= github.com/cyberphone/json-canonicalization v0.0.0-20231217050601-ba74d44ecf5f h1:eHnXnuK47UlSTOQexbzxAZfekVz6i+LKRdj1CU5DPaM= @@ -233,8 +231,8 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= -github.com/moby/sys/capability v0.3.0 h1:kEP+y6te0gEXIaeQhIi0s7vKs/w0RPoH1qPa6jROcVg= -github.com/moby/sys/capability v0.3.0/go.mod h1:4g9IK291rVkms3LKCDOoYlnV8xKwoDTpIrNEE35Wq0I= +github.com/moby/sys/capability v0.4.0 h1:4D4mI6KlNtWMCM1Z/K0i7RV1FkX+DBDHKVJpCndZoHk= +github.com/moby/sys/capability v0.4.0/go.mod h1:4g9IK291rVkms3LKCDOoYlnV8xKwoDTpIrNEE35Wq0I= github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg= github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4= github.com/moby/sys/user v0.3.0 h1:9ni5DlcW5an3SvRSx4MouotOygvzaXbaSrc/wGDFWPo= @@ -250,6 +248,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= +github.com/mtrmac/storage v0.0.0-20241115211512-4c9e6d846764 h1:eKGr+Bfv/fKR3IAZFxVdgIHsXM7FjWRaGr3QaOrlszE= +github.com/mtrmac/storage v0.0.0-20241115211512-4c9e6d846764/go.mod h1:RPuMmpk0UGbu01hmshf3kHcnjGlLRn9Z/yF5hLuD76g= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= From 27ac95eab06596252b02a014a3fd7c307e2419a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 28 Nov 2024 20:05:59 +0100 Subject: [PATCH 02/14] Handle a failure when closing compressor. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač --- storage/storage_test.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/storage/storage_test.go b/storage/storage_test.go index 6f90495fd..fc7e63e99 100644 --- a/storage/storage_test.go +++ b/storage/storage_test.go @@ -189,14 +189,18 @@ func TestParseWithGraphDriverOptions(t *testing.T) { // makeLayerGoroutine writes to pwriter, and on success, updates uncompressedCount // before it terminates. -func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression archive.Compression) error { +func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression archive.Compression) (retErr error) { var uncompressed *ioutils.WriteCounter if compression != archive.Uncompressed { compressor, err := archive.CompressStream(pwriter, compression) if err != nil { return fmt.Errorf("compressing layer: %w", err) } - defer compressor.Close() + defer func() { + if err := compressor.Close(); err != nil && retErr == nil { + retErr = err + } + }() uncompressed = ioutils.NewWriteCounter(compressor) } else { uncompressed = ioutils.NewWriteCounter(pwriter) From 884edfcac1751a85363bb3d08ad07068ca4cfc3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 28 Nov 2024 20:26:10 +0100 Subject: [PATCH 03/14] Use more representative configs in storage tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... because we will start enforcing that the DiffID values match. Signed-off-by: Miloslav Trmač --- storage/storage_test.go | 125 ++++++++++++++++++++++------------------ 1 file changed, 68 insertions(+), 57 deletions(-) diff --git a/storage/storage_test.go b/storage/storage_test.go index fc7e63e99..870688933 100644 --- a/storage/storage_test.go +++ b/storage/storage_test.go @@ -9,6 +9,7 @@ import ( "context" "crypto/rand" "crypto/sha256" + "encoding/json" "errors" "flag" "fmt" @@ -187,12 +188,11 @@ func TestParseWithGraphDriverOptions(t *testing.T) { } } -// makeLayerGoroutine writes to pwriter, and on success, updates uncompressedCount +// makeLayerGoroutine writes to dest, and on success, updates uncompressedCount and uncompressedDigest // before it terminates. -func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression archive.Compression) (retErr error) { - var uncompressed *ioutils.WriteCounter +func makeLayerGoroutine(dest io.Writer, uncompressedCount *int64, uncompressedDigest *digest.Digest, compression archive.Compression) (retErr error) { if compression != archive.Uncompressed { - compressor, err := archive.CompressStream(pwriter, compression) + compressor, err := archive.CompressStream(dest, compression) if err != nil { return fmt.Errorf("compressing layer: %w", err) } @@ -201,11 +201,12 @@ func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression retErr = err } }() - uncompressed = ioutils.NewWriteCounter(compressor) - } else { - uncompressed = ioutils.NewWriteCounter(pwriter) + dest = compressor } - twriter := tar.NewWriter(uncompressed) + + uncompressedCounter := ioutils.NewWriteCounter(dest) + uncompressedDigester := digest.Canonical.Digester() + twriter := tar.NewWriter(io.MultiWriter(uncompressedCounter, uncompressedDigester.Hash())) // defer twriter.Close() // should be called here to correctly terminate the archive. // We do not do that, to workaround https://github.com/containers/storage/issues/1729 : @@ -248,36 +249,40 @@ func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression if err := twriter.Flush(); err != nil { return fmt.Errorf("Error flushing output to tar archive: %w", err) } - *uncompressedCount = uncompressed.Count + *uncompressedCount = uncompressedCounter.Count + *uncompressedDigest = uncompressedDigester.Digest() return nil } type testBlob struct { - compressedDigest digest.Digest - uncompressedSize int64 - compressedSize int64 - data []byte + uncompressedDigest digest.Digest + compressedDigest digest.Digest + uncompressedSize int64 + compressedSize int64 + data []byte } func makeLayer(t *testing.T, compression archive.Compression) testBlob { preader, pwriter := io.Pipe() var uncompressedCount int64 + var uncompressedDigest digest.Digest go func() { err := errors.New("Internal error: unexpected panic in makeLayer") defer func() { // Note that this is not the same as {defer pipeWriter.CloseWithError(err)}; we need err to be evaluated lazily. _ = pwriter.CloseWithError(err) }() - err = makeLayerGoroutine(pwriter, &uncompressedCount, compression) + err = makeLayerGoroutine(pwriter, &uncompressedCount, &uncompressedDigest, compression) }() tbuffer := bytes.Buffer{} _, err := io.Copy(&tbuffer, preader) require.NoError(t, err) return testBlob{ - compressedDigest: digest.SHA256.FromBytes(tbuffer.Bytes()), - uncompressedSize: uncompressedCount, - compressedSize: int64(tbuffer.Len()), - data: tbuffer.Bytes(), + uncompressedDigest: uncompressedDigest, + compressedDigest: digest.SHA256.FromBytes(tbuffer.Bytes()), + uncompressedSize: uncompressedCount, + compressedSize: int64(tbuffer.Len()), + data: tbuffer.Bytes(), } } @@ -309,6 +314,40 @@ func ensureTestCanCreateImages(t *testing.T) { } } +// configForLayers returns a minimally-plausible config for layers +func configForLayers(t *testing.T, layers []testBlob) testBlob { + rootFS := manifest.Schema2RootFS{ + Type: "layers", + DiffIDs: []digest.Digest{}, + } + for _, l := range layers { + rootFS.DiffIDs = append(rootFS.DiffIDs, l.uncompressedDigest) + } + // Add a unique label so that different calls to configForLayers don’t try to use the same image ID. + randomBytes := make([]byte, digest.Canonical.Size()) + _, err := rand.Read(randomBytes) + require.NoError(t, err) + config := manifest.Schema2Image{ + Schema2V1Image: manifest.Schema2V1Image{ + Config: &manifest.Schema2Config{ + Labels: map[string]string{"unique": fmt.Sprintf("%x", randomBytes)}, + }, + Created: time.Now(), + }, + RootFS: &rootFS, + } + configBytes, err := json.Marshal(config) + require.NoError(t, err) + configDigest := digest.Canonical.FromBytes(configBytes) + return testBlob{ + uncompressedDigest: configDigest, + compressedDigest: configDigest, + uncompressedSize: int64(len(configBytes)), + compressedSize: int64(len(configBytes)), + data: configBytes, + } +} + func createUncommittedImageDest(t *testing.T, ref types.ImageReference, cache types.BlobInfoCache, layers []testBlob, config *testBlob) (types.ImageDestination, types.UnparsedImage) { dest, err := ref.NewImageDestination(context.Background(), nil) @@ -320,21 +359,11 @@ func createUncommittedImageDest(t *testing.T, ref types.ImageReference, cache ty layerDescriptors = append(layerDescriptors, desc) } - var configDescriptor manifest.Schema2Descriptor - if config != nil { - configDescriptor = config.storeBlob(t, dest, cache, manifest.DockerV2Schema2ConfigMediaType, true) - } else { - // Use a random digest so that different calls to createUncommittedImageDest with config == nil don’t try to - // use the same image ID. - digestBytes := make([]byte, digest.Canonical.Size()) - _, err := rand.Read(digestBytes) - require.NoError(t, err) - configDescriptor = manifest.Schema2Descriptor{ - MediaType: manifest.DockerV2Schema2ConfigMediaType, - Size: 1, - Digest: digest.NewDigestFromBytes(digest.Canonical, digestBytes), - } + if config == nil { + cd := configForLayers(t, layers) + config = &cd } + configDescriptor := config.storeBlob(t, dest, cache, manifest.DockerV2Schema2ConfigMediaType, true) manifest := manifest.Schema2FromComponents(configDescriptor, layerDescriptors) manifestBytes, err := manifest.Serialize() @@ -362,14 +391,6 @@ func createImage(t *testing.T, ref types.ImageReference, cache types.BlobInfoCac func TestWriteRead(t *testing.T) { ensureTestCanCreateImages(t) - configBytes := []byte(`{"config":{"labels":{}},"created":"2006-01-02T15:04:05Z"}`) - config := testBlob{ - compressedDigest: digest.SHA256.FromBytes(configBytes), - uncompressedSize: int64(len(configBytes)), - compressedSize: int64(len(configBytes)), - data: configBytes, - } - manifests := []string{ //`{ // "schemaVersion": 2, @@ -447,6 +468,7 @@ func TestWriteRead(t *testing.T) { layer := makeLayer(t, compression) _ = layer.storeBlob(t, dest, cache, manifest.DockerV2Schema2LayerMediaType, false) t.Logf("Wrote randomly-generated layer %q (%d/%d bytes) to destination", layer.compressedDigest, layer.compressedSize, layer.uncompressedSize) + config := configForLayers(t, []testBlob{layer}) _ = config.storeBlob(t, dest, cache, manifest.DockerV2Schema2ConfigMediaType, true) manifest := strings.ReplaceAll(manifestFmt, "%lh", layer.compressedDigest.String()) @@ -639,18 +661,13 @@ func TestSize(t *testing.T) { layer1 := makeLayer(t, archive.Gzip) layer2 := makeLayer(t, archive.Gzip) - configBytes := []byte(`{"config":{"labels":{}},"created":"2006-01-02T15:04:05Z"}`) - config := testBlob{ - compressedDigest: digest.SHA256.FromBytes(configBytes), - uncompressedSize: int64(len(configBytes)), - compressedSize: int64(len(configBytes)), - data: configBytes, - } + layerBlobs := []testBlob{layer1, layer2} + config := configForLayers(t, layerBlobs) ref, err := Transport.ParseReference("test") require.NoError(t, err) - createImage(t, ref, cache, []testBlob{layer1, layer2}, &config) + createImage(t, ref, cache, layerBlobs, &config) img, err := ref.NewImage(context.Background(), nil) require.NoError(t, err) @@ -677,15 +694,9 @@ func TestDuplicateBlob(t *testing.T) { layer1 := makeLayer(t, archive.Gzip) layer2 := makeLayer(t, archive.Gzip) - configBytes := []byte(`{"config":{"labels":{}},"created":"2006-01-02T15:04:05Z"}`) - config := testBlob{ - compressedDigest: digest.SHA256.FromBytes(configBytes), - uncompressedSize: int64(len(configBytes)), - compressedSize: int64(len(configBytes)), - data: configBytes, - } - - createImage(t, ref, cache, []testBlob{layer1, layer2, layer1, layer2}, &config) + layerBlobs := []testBlob{layer1, layer2, layer1, layer2} + config := configForLayers(t, layerBlobs) + createImage(t, ref, cache, layerBlobs, &config) img, err := ref.NewImage(context.Background(), nil) require.NoError(t, err) From 448a179ffc03ebaaaec67acd76c7c50c7491c2dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Nov 2024 22:22:03 +0100 Subject: [PATCH 04/14] Move computing trustedLayerIdentityData out of singleLayerIDComponent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will use the trustedLayerIdentityData for other purposes in the caller as well. Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index bad11706b..6a7aed1fe 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -821,15 +821,7 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo) // singleLayerIDComponent returns a single layer’s the input to computing a layer (chain) ID, // and an indication whether the input already has the shape of a layer ID. -// It returns ("", false) if the layer is not found at all (which should never happen) -func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDigest digest.Digest) (string, bool) { - s.lock.Lock() - defer s.lock.Unlock() - - trusted, ok := s.trustedLayerIdentityDataLocked(layerIndex, blobDigest) - if !ok { - return "", false - } +func (trusted trustedLayerIdentityData) singleLayerIDComponent() (string, bool) { if trusted.layerIdentifiedByTOC { return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. } @@ -872,9 +864,10 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si // Check if there's already a layer with the ID that we'd give to the result of applying // this layer blob to its parent, if it has one, or the blob's hex value otherwise. - // The layerID refers either to the DiffID or the digest of the TOC. - layerIDComponent, layerIDComponentStandalone := s.singleLayerIDComponent(index, info.digest) - if layerIDComponent == "" { + s.lock.Lock() + trusted, ok := s.trustedLayerIdentityDataLocked(index, info.digest) + s.lock.Unlock() + if !ok { // Check if it's elsewhere and the caller just forgot to pass it to us in a PutBlob() / TryReusingBlob() / … // // Use none.NoCache to avoid a repeated DiffID lookup in the BlobInfoCache: a caller @@ -899,12 +892,15 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, fmt.Errorf("error determining uncompressed digest for blob %q", info.digest.String()) } - layerIDComponent, layerIDComponentStandalone = s.singleLayerIDComponent(index, info.digest) - if layerIDComponent == "" { + s.lock.Lock() + trusted, ok = s.trustedLayerIdentityDataLocked(index, info.digest) + s.lock.Unlock() + if !ok { return false, fmt.Errorf("we have blob %q, but don't know its layer ID", info.digest.String()) } } - + // The layerID refers either to the DiffID or the digest of the TOC. + layerIDComponent, layerIDComponentStandalone := trusted.singleLayerIDComponent() id := layerIDComponent if !layerIDComponentStandalone || parentLayer != "" { id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded() From 735bb6c128a30d43a696d09fdc055594ec8b36a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 29 Nov 2024 18:37:07 +0100 Subject: [PATCH 05/14] Move singleLayerIDComponent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep the commit queuing logic together, this is more of an implementation detail of commitLayer. Only moves unchanged code, should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 6a7aed1fe..5ed4c3971 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -819,15 +819,6 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo) return nil } -// singleLayerIDComponent returns a single layer’s the input to computing a layer (chain) ID, -// and an indication whether the input already has the shape of a layer ID. -func (trusted trustedLayerIdentityData) singleLayerIDComponent() (string, bool) { - if trusted.layerIdentifiedByTOC { - return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. - } - return trusted.diffID.Encoded(), true // This looks like chain IDs, and it uses the traditional value. -} - // commitLayer commits the specified layer with the given index to the storage. // size can usually be -1; it can be provided if the layer is not known to be already present in blobDiffIDs. // @@ -922,6 +913,15 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, nil } +// singleLayerIDComponent returns a single layer’s the input to computing a layer (chain) ID, +// and an indication whether the input already has the shape of a layer ID. +func (trusted trustedLayerIdentityData) singleLayerIDComponent() (string, bool) { + if trusted.layerIdentifiedByTOC { + return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. + } + return trusted.diffID.Encoded(), true // This looks like chain IDs, and it uses the traditional value. +} + // createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be ""). // If the layer cannot be committed yet, the function returns (nil, nil). func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.Digest, parentLayer, newLayerID string) (*storage.Layer, error) { From d191fa9edff9f353a499616443c6d0f3c57b2607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Nov 2024 22:29:04 +0100 Subject: [PATCH 06/14] FIXME: Split layerID from commitLayer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's fairly isolated from the rest of the function, and if split, it can have unit tests. Those tests are valuable to ensure that layer IDs continue tob ehave the expected way and maximize layer reuse (although we are not making an API commitment to layer ID values). FIXME: Add unit tests Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 5ed4c3971..4afbe781c 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -890,12 +890,9 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, fmt.Errorf("we have blob %q, but don't know its layer ID", info.digest.String()) } } - // The layerID refers either to the DiffID or the digest of the TOC. - layerIDComponent, layerIDComponentStandalone := trusted.singleLayerIDComponent() - id := layerIDComponent - if !layerIDComponentStandalone || parentLayer != "" { - id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded() - } + + id := layerID(parentLayer, trusted) + if layer, err2 := s.imageRef.transport.store.Layer(id); layer != nil && err2 == nil { // There's already a layer that should have the right contents, just reuse it. s.indexToStorageID[index] = layer.ID @@ -913,13 +910,21 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, nil } -// singleLayerIDComponent returns a single layer’s the input to computing a layer (chain) ID, -// and an indication whether the input already has the shape of a layer ID. -func (trusted trustedLayerIdentityData) singleLayerIDComponent() (string, bool) { +// layerID computes a layer (“chain”) ID for (a possibly-empty parentLayer, trusted) +func layerID(parentLayer string, trusted trustedLayerIdentityData) string { + var layerIDComponent string if trusted.layerIdentifiedByTOC { - return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. + // "@" is not a valid start of a digest.Digest.Encoded(), so this is unambiguous with the !layerIdentifiedByTOC case. + // But we _must_ hash this below to get a Digest.Encoded()-formatted value. + layerIDComponent = "@TOC=" + trusted.tocDigest.Encoded() + } else { + layerIDComponent = trusted.diffID.Encoded() // This looks like chain IDs, and it uses the traditional value. + } + id := layerIDComponent + if trusted.layerIdentifiedByTOC || parentLayer != "" { + id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded() } - return trusted.diffID.Encoded(), true // This looks like chain IDs, and it uses the traditional value. + return id } // createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be ""). From ea7b0ef2c7a91010a38f4d2641d4f7b6144db1fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 29 Nov 2024 01:27:49 +0100 Subject: [PATCH 07/14] Beautify the implementation of layerID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 4afbe781c..f547a1cd0 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -910,21 +910,23 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, nil } -// layerID computes a layer (“chain”) ID for (a possibly-empty parentLayer, trusted) -func layerID(parentLayer string, trusted trustedLayerIdentityData) string { - var layerIDComponent string +// layerID computes a layer (“chain”) ID for (a possibly-empty parentID, trusted) +func layerID(parentID string, trusted trustedLayerIdentityData) string { + var component string + mustHash := false if trusted.layerIdentifiedByTOC { // "@" is not a valid start of a digest.Digest.Encoded(), so this is unambiguous with the !layerIdentifiedByTOC case. // But we _must_ hash this below to get a Digest.Encoded()-formatted value. - layerIDComponent = "@TOC=" + trusted.tocDigest.Encoded() + component = "@TOC=" + trusted.tocDigest.Encoded() + mustHash = true } else { - layerIDComponent = trusted.diffID.Encoded() // This looks like chain IDs, and it uses the traditional value. + component = trusted.diffID.Encoded() // This looks like chain IDs, and it uses the traditional value. } - id := layerIDComponent - if trusted.layerIdentifiedByTOC || parentLayer != "" { - id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded() + + if parentID == "" && !mustHash { + return component } - return id + return digest.Canonical.FromString(parentID + "+" + component).Encoded() } // createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be ""). From 21a5184c62551fbd8bb7e25e5ac7c364275bae53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Nov 2024 23:49:35 +0100 Subject: [PATCH 08/14] Introduce trustedLayerIdentityData.logString() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... to simplify some of the repetitive logging code. Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index f547a1cd0..ee7236892 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -609,6 +609,13 @@ type trustedLayerIdentityData struct { blobDigest digest.Digest // A digest of the (possibly-compressed) layer as presented, or "" if unknown/untrusted. } +// logString() prints a representation of trusted suitable identifying a layer in logs and errors. +// The string is already quoted to expose malicious input and does not need to be quoted again. +// Note that it does not include _all_ of the contents. +func (trusted trustedLayerIdentityData) logString() string { + return fmt.Sprintf("%q/%q/%q", trusted.blobDigest, trusted.tocDigest, trusted.diffID) +} + // trustedLayerIdentityDataLocked returns a _consistent_ set of information for a layer with (layerIndex, blobDigest). // blobDigest is the (possibly-compressed) layer digest referenced in the manifest. // It returns (trusted, true) if the layer was found, or (_, false) if insufficient data is available. @@ -1036,7 +1043,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } } if layer == nil { - return nil, fmt.Errorf("layer for blob %q/%q/%q not found", trusted.blobDigest, trusted.tocDigest, trusted.diffID) + return nil, fmt.Errorf("layer for blob %s not found", trusted.logString()) } // Read the layer's contents. @@ -1046,7 +1053,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } diff, err2 := s.imageRef.transport.store.Diff("", layer.ID, diffOptions) if err2 != nil { - return nil, fmt.Errorf("reading layer %q for blob %q/%q/%q: %w", layer.ID, trusted.blobDigest, trusted.tocDigest, trusted.diffID, err2) + return nil, fmt.Errorf("reading layer %q for blob %s: %w", layer.ID, trusted.logString(), err2) } // Copy the layer diff to a file. Diff() takes a lock that it holds // until the ReadCloser that it returns is closed, and PutLayer() wants @@ -1125,7 +1132,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D UncompressedDigest: trusted.diffID, }, file) if err != nil && !errors.Is(err, storage.ErrDuplicateID) { - return nil, fmt.Errorf("adding layer with blob %q/%q/%q: %w", trusted.blobDigest, trusted.tocDigest, trusted.diffID, err) + return nil, fmt.Errorf("adding layer with blob %s: %w", trusted.logString(), err) } return layer, nil } From 733356456f743dc1b43f7b4625cdfefac73f8a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 26 Nov 2024 20:43:20 +0100 Subject: [PATCH 09/14] Return sentinel errors from untrustedLayerDiffID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit untrustedLayerDiffID currently specializes the "not available yet" case; also specialize the "image does not provide this at all" case, which we will need to handle. Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index ee7236892..1a2018727 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -957,12 +957,12 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D if diffOutput.UncompressedDigest == "" { d, err := s.untrustedLayerDiffID(index) if err != nil { + if errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable) { + logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID) + return nil, nil + } return nil, err } - if d == "" { - logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID) - return nil, nil - } untrustedUncompressedDigest = d // While the contents of the digest are untrusted, make sure at least the _format_ is valid, @@ -1182,8 +1182,27 @@ func (u *uncommittedImageSource) GetBlob(ctx context.Context, info types.BlobInf return io.NopCloser(bytes.NewReader(blob)), int64(len(blob)), nil } +// errUntrustedLayerDiffIDNotYetAvailable is returned by untrustedLayerDiffID +// if the value is not yet available (but it can be available after s.manifests is set). +// This should only happen for external callers of the transport, not for c/image/copy. +// +// Callers of untrustedLayerDiffID before PutManifest must handle this error specially; +// callers after PutManifest can use the default, reporting an internal error. +var errUntrustedLayerDiffIDNotYetAvailable = errors.New("internal error: untrustedLayerDiffID has no value available and fallback was not implemented") + +// untrustedLayerDiffIDUnknownError is returned by untrustedLayerDiffID +// if the image’s format does not provide DiffIDs. +type untrustedLayerDiffIDUnknownError struct { + layerIndex int +} + +func (e untrustedLayerDiffIDUnknownError) Error() string { + return fmt.Sprintf("DiffID value for layer %d is unknown or explicitly empty", e.layerIndex) +} + // untrustedLayerDiffID returns a DiffID value for layerIndex from the image’s config. -// If the value is not yet available (but it can be available after s.manifets is set), it returns ("", nil). +// It may return two special errors, errUntrustedLayerDiffIDNotYetAvailable or untrustedLayerDiffIDUnknownError. +// // WARNING: We don’t validate the DiffID value against the layer contents; it must not be used for any deduplication. func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.Digest, error) { // At this point, we are either inside the multi-threaded scope of HasThreadSafePutBlob, @@ -1197,7 +1216,7 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D // via NoteOriginalOCIConfig; this is a compatibility fallback for external callers // of the public types.ImageDestination. if s.manifest == nil { - return "", nil + return "", errUntrustedLayerDiffIDNotYetAvailable } ctx := context.Background() // This is all happening in memory, no need to worry about cancellation. @@ -1227,9 +1246,9 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D // using a manual (skopeo copy) or something similar, not (podman pull). // // Our schema1.OCIConfig code produces non-empty DiffID arrays of empty values. - // The current semantics of this function are that ("", nil) means "try again later", - // which is not what we want to happen; for now, turn that into an explicit error. - return "", fmt.Errorf("DiffID value for layer %d is unknown or explicitly empty", layerIndex) + return "", untrustedLayerDiffIDUnknownError{ + layerIndex: layerIndex, + } } return res, nil } From 1a25930e627c532d0d88f717c40bf326c29532dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Nov 2024 23:58:05 +0100 Subject: [PATCH 10/14] Pass trustedLayerIdentityData into createNewLayer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two different locations in the function need the data, and the caller must have it available; so always passing it in simplifies the implementation and removes an impossible error path. This might hypothetically make layer reuse a bit worse, if we happened to learn something for trustedLayerIdentityData from processing other layers of the same image, but reusing the same layer twice within an image should be are. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 1a2018727..cd7c7deda 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -906,7 +906,7 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, nil } - layer, err := s.createNewLayer(index, info.digest, parentLayer, id) + layer, err := s.createNewLayer(index, trusted, parentLayer, id) if err != nil { return false, err } @@ -936,9 +936,9 @@ func layerID(parentID string, trusted trustedLayerIdentityData) string { return digest.Canonical.FromString(parentID + "+" + component).Encoded() } -// createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be ""). +// createNewLayer creates a new layer newLayerID for (index, trusted) on top of parentLayer (which may be ""). // If the layer cannot be committed yet, the function returns (nil, nil). -func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.Digest, parentLayer, newLayerID string) (*storage.Layer, error) { +func (s *storageImageDestination) createNewLayer(index int, trusted trustedLayerIdentityData, parentLayer, newLayerID string) (*storage.Layer, error) { s.lock.Lock() diffOutput, ok := s.lockProtected.diffOutputs[index] s.lock.Unlock() @@ -947,11 +947,9 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // That way it will be persisted in storage even if the cache is deleted; also // we can use the value below to avoid the untrustedUncompressedDigest logic (and notably // the costly commit delay until a manifest is available). - s.lock.Lock() - if d, ok := s.lockProtected.indexToDiffID[index]; ok { - diffOutput.UncompressedDigest = d + if diffOutput.UncompressedDigest == "" && trusted.diffID != "" { + diffOutput.UncompressedDigest = trusted.diffID } - s.lock.Unlock() var untrustedUncompressedDigest digest.Digest if diffOutput.UncompressedDigest == "" { @@ -1009,14 +1007,10 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // then we need to read the desired contents from a layer. var filename string var gotFilename bool - s.lock.Lock() - trusted, ok := s.trustedLayerIdentityDataLocked(index, layerDigest) - if ok && trusted.blobDigest != "" { + if trusted.blobDigest != "" { + s.lock.Lock() filename, gotFilename = s.lockProtected.filenames[trusted.blobDigest] - } - s.lock.Unlock() - if !ok { // We have already determined newLayerID, so the data must have been available. - return nil, fmt.Errorf("internal inconsistency: layer (%d, %q) not found", index, layerDigest) + s.lock.Unlock() } var trustedOriginalDigest digest.Digest // For storage.LayerOptions var trustedOriginalSize *int64 From 6ce7ae2c12e5f27431d6870f879d711d4626e2a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 26 Nov 2024 21:36:18 +0100 Subject: [PATCH 11/14] WIP: Update for c/storage almost always computing UncompressedDigest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will want c/storage to do that in order to avoid the traditional/partial "view" ambiguity. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index cd7c7deda..1cb476410 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -385,23 +385,22 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces if out.UncompressedDigest != "" { s.lockProtected.indexToDiffID[options.LayerIndex] = out.UncompressedDigest if out.TOCDigest != "" { + s.lockProtected.indexToTOCDigest[options.LayerIndex] = out.TOCDigest options.Cache.RecordTOCUncompressedPair(out.TOCDigest, out.UncompressedDigest) } - // Don’t set indexToTOCDigest on this path: - // - Using UncompressedDigest allows image reuse with non-partially-pulled layers, so we want to set indexToDiffID. - // - If UncompressedDigest has been computed, that means the layer was read completely, and the TOC has been created from scratch. - // That TOC is quite unlikely to match any other TOC value. - // The computation of UncompressedDigest means the whole layer has been consumed; while doing that, chunked.GetDiffer is + // If the whole layer has been consumed, chunked.GetDiffer is // responsible for ensuring blobDigest has been validated. - if out.CompressedDigest != blobDigest { - return fmt.Errorf("internal error: PrepareStagedLayer returned CompressedDigest %q not matching expected %q", - out.CompressedDigest, blobDigest) + if out.CompressedDigest != "" { + if out.CompressedDigest != blobDigest { + return fmt.Errorf("internal error: PrepareStagedLayer returned CompressedDigest %q not matching expected %q", + out.CompressedDigest, blobDigest) + } + // So, record also information about blobDigest, that might benefit reuse. + // We trust PrepareStagedLayer to validate or create both values correctly. + s.lockProtected.blobDiffIDs[blobDigest] = out.UncompressedDigest + options.Cache.RecordDigestUncompressedPair(out.CompressedDigest, out.UncompressedDigest) } - // So, record also information about blobDigest, that might benefit reuse. - // We trust PrepareStagedLayer to validate or create both values correctly. - s.lockProtected.blobDiffIDs[blobDigest] = out.UncompressedDigest - options.Cache.RecordDigestUncompressedPair(out.CompressedDigest, out.UncompressedDigest) } else { // Use diffID for layer identity if it is known. if uncompressedDigest := options.Cache.UncompressedDigestForTOC(out.TOCDigest); uncompressedDigest != "" { From 608ac064400a2ae4fec7f4da5da64e60c86c8de9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 26 Nov 2024 21:17:52 +0100 Subject: [PATCH 12/14] WIP: Don't do partial pulls of images with unknown DiffID values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 1cb476410..72adf62d4 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -343,6 +343,39 @@ func (f *zstdFetcher) GetBlobAt(chunks []chunked.ImageSourceChunk) (chan io.Read // If the call fails with ErrFallbackToOrdinaryLayerDownload, the caller can fall back to PutBlobWithOptions. // The fallback _must not_ be done otherwise. func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, options private.PutBlobPartialOptions) (_ private.UploadedBlob, retErr error) { + // The identity of partially-pulled layers is, as long as we keep compatibility with tar-like consumers, + // unfixably ambiguous: there are two possible “views” of the same file (same compressed digest), + // the traditional “view” that decompresses the primary stream and consumes a tar file, + // and the partial-pull “view” that starts with the TOC. + // The two “views” have two separate metadata sets and may refer to different parts of the blob for file contents; + // the direct way to ensure they are consistent would be to read the full primary stream (and authenticate it against + // the compressed digest), and ensure the metadata and layer contents exactly match the partially-pulled contents - + // making the partial pull completely pointless. + // + // Instead, we require the image to “commit” to uncompressed layer digest values via the config's RootFS.DiffIDs array: + // they are already naturally computed for traditionally-pulled layers, and for partially-pulled layers we + // do the optimal partial pull, and then reconstruct the uncompressed tar stream just to (expensively) compute this digest. + // + // So, if the input image doesn't have RootFS.DiffIDs (i.e. is schema1), don't bother with partial pulls at all, + // and always use the traditional “view”. + // (Hypothetically, the user can opt out of the DiffID commitment by a c/storage option, giving up security for performance, + // but schema1 images don't support layer annotations anyway, so neither zstd:chunked nor estargz layers can + // benefit from partial pulls anyway — so this is not giving up on partial pull functionality; OTOH it may + // interfere with composefs conversion, which currently only happens on the partial pull code path.) + untrustedDiffID, err := s.untrustedLayerDiffID(options.LayerIndex) + if err != nil { + if errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable) { + // PutBlobPartial is a private API, so all callers are within c/image, and should have called + // NoteOriginalOCIConfig first. + return private.UploadedBlob{}, fmt.Errorf("internal error: in PutBlobPartial, untrustedLayerDiffID returned errUntrustedLayerDiffIDNotYetAvailable") + } + var e untrustedLayerDiffIDUnknownError + if errors.As(err, &e) { + return private.UploadedBlob{}, private.NewErrFallbackToOrdinaryLayerDownload(err) + } + return private.UploadedBlob{}, err + } + fetcher := zstdFetcher{ chunkAccessor: chunkAccessor, ctx: ctx, @@ -382,7 +415,15 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces if err := func() error { // A scope for defer defer s.lock.Unlock() + // For true partial pulls, c/storage decides whether to compute the uncompressed digest based on an option in storage.conf. + // c/storage can also be configured, on this code path, to consume the full blob (primarily to allow composefs conversion), + // and in that case it always consumes the full blob and always computes the uncompressed digest. if out.UncompressedDigest != "" { + if out.UncompressedDigest != untrustedDiffID { + return fmt.Errorf("uncompressed digest of layer %q is %q, config claims %q", srcInfo.Digest.String(), + out.UncompressedDigest.String(), untrustedDiffID.String()) + } + s.lockProtected.indexToDiffID[options.LayerIndex] = out.UncompressedDigest if out.TOCDigest != "" { s.lockProtected.indexToTOCDigest[options.LayerIndex] = out.TOCDigest From 19d799686f0d226ef26511c0db51aa3da7a0a318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Nov 2024 23:02:58 +0100 Subject: [PATCH 13/14] Improve comments throughout commitLayer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove some completely redundant comments to shorten the code, clarify where appropriate. Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 72adf62d4..be52158f2 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -877,16 +877,15 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo) // must guarantee that, at any given time, at most one goroutine may execute // `commitLayer()`. func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, size int64) (bool, error) { - // Already committed? Return early. if _, alreadyCommitted := s.indexToStorageID[index]; alreadyCommitted { return false, nil } - // Start with an empty string or the previous layer ID. Note that - // `s.indexToStorageID` can only be accessed by *one* goroutine at any - // given time. Hence, we don't need to lock accesses. - var parentLayer string + var parentLayer string // "" if no parent if index != 0 { + // s.indexToStorageID can only be written by this function, and the our caller + // is responsible for ensuring it can be only be called by *one* goroutine at any + // given time. Hence, we don't need to lock accesses. prev, ok := s.indexToStorageID[index-1] if !ok { return false, fmt.Errorf("Internal error: commitLayer called with previous layer %d not committed yet", index-1) @@ -894,19 +893,17 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si parentLayer = prev } - // Carry over the previous ID for empty non-base layers. if info.emptyLayer { s.indexToStorageID[index] = parentLayer return false, nil } - // Check if there's already a layer with the ID that we'd give to the result of applying - // this layer blob to its parent, if it has one, or the blob's hex value otherwise. + // Collect trusted parameters of the layer. s.lock.Lock() trusted, ok := s.trustedLayerIdentityDataLocked(index, info.digest) s.lock.Unlock() if !ok { - // Check if it's elsewhere and the caller just forgot to pass it to us in a PutBlob() / TryReusingBlob() / … + // Check if the layer exists already and the caller just (incorrectly) forgot to pass it to us in a PutBlob() / TryReusingBlob() / … // // Use none.NoCache to avoid a repeated DiffID lookup in the BlobInfoCache: a caller // that relies on using a blob digest that has never been seen by the store had better call From 798709368a613849e0bb5ae48608d542ed5b1618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 26 Nov 2024 21:17:52 +0100 Subject: [PATCH 14/14] WIP: Enforce DiffID match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note the FIXMEs Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 50 ++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index be52158f2..6c99ccae4 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -419,6 +419,8 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces // c/storage can also be configured, on this code path, to consume the full blob (primarily to allow composefs conversion), // and in that case it always consumes the full blob and always computes the uncompressed digest. if out.UncompressedDigest != "" { + // This is centrally enforced later, in commitLayer, but because we have the value available, + // we might just as well check immediately. if out.UncompressedDigest != untrustedDiffID { return fmt.Errorf("uncompressed digest of layer %q is %q, config claims %q", srcInfo.Digest.String(), out.UncompressedDigest.String(), untrustedDiffID.String()) @@ -935,6 +937,30 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si } } + // Ensure that we always see the same “view” of a layer, as identified by the layer’s uncompressed digest, + // unless the user has explicitly opted out of this in storage.conf: see the more detailed explanation in PutBlobPartial. + if trusted.diffID != "" { + untrustedDiffID, err := s.untrustedLayerDiffID(index) + if err != nil { + if errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable) { + logrus.Debugf("Skipping commit for layer %q, manifest not yet available for DiffID check", index) + return true, nil + } + var e untrustedLayerDiffIDUnknownError + if !errors.As(err, &e) { + return false, err + } + // If untrustedLayerDiffIDUnknownError, the input image is schema1 and we should not have done a partial pull or a TOC-based match. + // FIXME: review tryReusingBlob… and PutBlobPartial to verify + if trusted.layerIdentifiedByTOC { + return false, fmt.Errorf("internal error: layer %d for blob %s was identified by TOC, but we don't have a DiffID in config", + index, trusted.logString()) + } + } else if trusted.diffID != untrustedDiffID { + return false, fmt.Errorf("layer %d (blob %s) does not match config's DiffID %q", index, trusted.logString(), untrustedDiffID) + } + } + id := layerID(parentLayer, trusted) if layer, err2 := s.imageRef.transport.store.Layer(id); layer != nil && err2 == nil { @@ -980,10 +1006,11 @@ func (s *storageImageDestination) createNewLayer(index int, trusted trustedLayer diffOutput, ok := s.lockProtected.diffOutputs[index] s.lock.Unlock() if ok { - // If we know a trusted DiffID value (e.g. from a BlobInfoCache), set it in diffOutput. + // Typically, we compute a trusted DiffID value to authenticate the layer contents, see the detailed explanation + // in PutBlobPartial. If the user has opted out of that, but we know a trusted DiffID value + // (e.g. from a BlobInfoCache), set it in diffOutput. // That way it will be persisted in storage even if the cache is deleted; also - // we can use the value below to avoid the untrustedUncompressedDigest logic (and notably - // the costly commit delay until a manifest is available). + // we can use the value below to avoid the untrustedUncompressedDigest logic. if diffOutput.UncompressedDigest == "" && trusted.diffID != "" { diffOutput.UncompressedDigest = trusted.diffID } @@ -996,6 +1023,13 @@ func (s *storageImageDestination) createNewLayer(index int, trusted trustedLayer logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID) return nil, nil } + var e untrustedLayerDiffIDUnknownError + if errors.As(err, &e) { + // If untrustedLayerDiffIDUnknownError, the input image is schema1 and we should not have done a partial pull or a TOC-based match. + // FIXME: review tryReusingBlob… and PutBlobPartial to verify + return nil, fmt.Errorf("internal error: layer %d for blob %s was partially-pulled, but we don't have a DiffID in config", + index, trusted.logString()) + } return nil, err } @@ -1234,7 +1268,8 @@ func (e untrustedLayerDiffIDUnknownError) Error() string { // untrustedLayerDiffID returns a DiffID value for layerIndex from the image’s config. // It may return two special errors, errUntrustedLayerDiffIDNotYetAvailable or untrustedLayerDiffIDUnknownError. // -// WARNING: We don’t validate the DiffID value against the layer contents; it must not be used for any deduplication. +// WARNING: This function does not even validate that the returned digest has a valid format. +// WARNING: We don’t _always_ validate this DiffID value against the layer contents; it must not be used for any deduplication. func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.Digest, error) { // At this point, we are either inside the multi-threaded scope of HasThreadSafePutBlob, // nothing is writing to s.manifest yet, and s.untrustedDiffIDValues might have been set @@ -1269,12 +1304,7 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D } res := s.untrustedDiffIDValues[layerIndex] if res == "" { - // In practice, this should, right now, only matter for pulls of OCI images - // (this code path implies that we did a partial pull because a layer has an annotation), - // So, DiffIDs should always be set. - // - // It is, though, reachable by pulling an OCI image while converting to schema1, - // using a manual (skopeo copy) or something similar, not (podman pull). + // schema1 images don’t have DiffID values in the config. // // Our schema1.OCIConfig code produces non-empty DiffID arrays of empty values. return "", untrustedLayerDiffIDUnknownError{