From 0147da5f6291cbe90b098df82979c1d92b99fb55 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Fri, 19 Apr 2024 12:49:50 +0900 Subject: [PATCH] verify entire integrity Signed-off-by: Naoki MATSUMOTO --- pkg/di3fs/di3fs.go | 100 +++++++++++++++++++++++++------------------- pkg/image/fs.go | 96 +++++++++++++++++++++++++++++++++++++----- pkg/image/pack.go | 92 +++++++++++++++++++++++++++++++--------- pkg/image/patch.go | 18 ++++++++ tests/bench_impl.sh | 2 +- 5 files changed, 235 insertions(+), 73 deletions(-) diff --git a/pkg/di3fs/di3fs.go b/pkg/di3fs/di3fs.go index fc4243e..6d86c19 100644 --- a/pkg/di3fs/di3fs.go +++ b/pkg/di3fs/di3fs.go @@ -107,54 +107,62 @@ func (dn *Di3fsNode) readBaseFiles() ([]byte, error) { func (dn *Di3fsNode) openFileInImage() (fs.FileHandle, uint32, syscall.Errno) { if len(dn.data) != 0 { - } else if dn.meta.IsNew() { - patchBytes := make([]byte, dn.meta.CompressedSize) - _, err := dn.root.diffImageFile.ReadAt(patchBytes, dn.meta.Offset) - if err != nil { - log.Errorf("failed to read from diffImage offset=%d err=%s", dn.meta.Offset, err) - return 0, 0, syscall.EIO - } - patchBuf := bytes.NewBuffer(patchBytes) - patchReader, err := zstd.NewReader(patchBuf) - if err != nil { - log.Errorf("failed to create zstd Reader err=%s", err) - return 0, 0, syscall.EIO - } - defer patchReader.Close() - dn.data, err = io.ReadAll(patchReader) - if err != nil { - log.Errorf("failed to read with zstd Reader err=%s", err) - return 0, 0, syscall.EIO - } - } else if dn.meta.IsSame() { - data, err := dn.readBaseFiles() - if err != nil { - log.Errorf("failed to read from base: %v", err) - return 0, 0, syscall.EIO - } - dn.data = data } else { - var patchReader io.Reader - patchBytes := make([]byte, dn.meta.CompressedSize) - _, err := dn.root.diffImageFile.ReadAt(patchBytes, dn.meta.Offset) - if err != nil { - log.Errorf("failed to read from diffImage offset=%d len=%d err=%s", dn.meta.Offset, len(patchBytes), err) - return 0, 0, syscall.EIO - } - patchReader = bytes.NewBuffer(patchBytes) - baseData, err := dn.readBaseFiles() - if err != nil { - log.Errorf("failed to read from base: %v", err) - return 0, 0, syscall.EIO + if dn.meta.IsNew() { + patchBytes := make([]byte, dn.meta.CompressedSize) + _, err := dn.root.diffImageFile.ReadAt(patchBytes, dn.meta.Offset) + if err != nil { + log.Errorf("failed to read from diffImage offset=%d err=%s", dn.meta.Offset, err) + return 0, 0, syscall.EIO + } + patchBuf := bytes.NewBuffer(patchBytes) + patchReader, err := zstd.NewReader(patchBuf) + if err != nil { + log.Errorf("failed to create zstd Reader err=%s", err) + return 0, 0, syscall.EIO + } + defer patchReader.Close() + dn.data, err = io.ReadAll(patchReader) + if err != nil { + log.Errorf("failed to read with zstd Reader err=%s", err) + return 0, 0, syscall.EIO + } + } else if dn.meta.IsSame() { + data, err := dn.readBaseFiles() + if err != nil { + log.Errorf("failed to read from base: %v", err) + return 0, 0, syscall.EIO + } + dn.data = data + } else { + var patchReader io.Reader + patchBytes := make([]byte, dn.meta.CompressedSize) + _, err := dn.root.diffImageFile.ReadAt(patchBytes, dn.meta.Offset) + if err != nil { + log.Errorf("failed to read from diffImage offset=%d len=%d err=%s", dn.meta.Offset, len(patchBytes), err) + return 0, 0, syscall.EIO + } + patchReader = bytes.NewBuffer(patchBytes) + baseData, err := dn.readBaseFiles() + if err != nil { + log.Errorf("failed to read from base: %v", err) + return 0, 0, syscall.EIO + } + + newBytes, err := dn.plugin.Patch(baseData, patchReader) + if err != nil { + log.Errorf("Open failed(bsdiff) err=%v", err) + return 0, 0, syscall.EIO + } + dn.data = newBytes + log.Debugf("Successfully patched %s", dn.meta.Name) } - newBytes, err := dn.plugin.Patch(baseData, patchReader) + err := dn.meta.Verify(dn.data) if err != nil { - log.Errorf("Open failed(bsdiff) err=%v", err) + log.Errorf("failed to verify %s(%d): %v", dn.path, dn.meta.Type, err) return 0, 0, syscall.EIO } - dn.data = newBytes - log.Debugf("Successfully patched %s", dn.meta.Name) } return nil, fuse.FOPEN_KEEP_CACHE | fuse.FOPEN_CACHE_DIR, 0 } @@ -205,6 +213,14 @@ func (dr *Di3fsNode) OnAdd(ctx context.Context) { if dr.root.IsBase() && dr.meta.IsBaseRequired() { log.Fatalf("invalid base image") } + + if !dr.meta.IsFile() { + err := dr.meta.Verify(nil) + if err != nil { + log.Fatalf("failed to verify %s: %v", dr.path, err) + } + } + // here, rootNode is initialized //log.Debugf("base=%s patch=%s", dr.basePath, dr.patchPath) for childfName := range dr.meta.Childs { diff --git a/pkg/image/fs.go b/pkg/image/fs.go index e67d0de..dfe0ed8 100644 --- a/pkg/image/fs.go +++ b/pkg/image/fs.go @@ -6,10 +6,12 @@ import ( "fmt" "io" "os" + "slices" "strings" "syscall" "github.com/klauspost/compress/zstd" + "github.com/opencontainers/go-digest" ) type EntryType int @@ -68,16 +70,18 @@ func UnmarshalJsonFromCompressed[T any](b []byte) (*T, error) { } type FileEntry struct { - Name string `json:"name"` - Size int `json:"size"` - Mode uint32 `json:"mode"` - UID uint32 `json:"uid"` - GID uint32 `json:"gid"` - Type EntryType `json:"type"` - RealPath string `json:"realPath,omitempty"` - Childs map[string]*FileEntry `json:"childs"` - CompressedSize int64 `json:"compressedSize,omitempty"` - Offset int64 `json:"offset,omitempty"` + Name string `json:"name"` + Size int `json:"size"` + Mode uint32 `json:"mode"` + UID uint32 `json:"uid"` + GID uint32 `json:"gid"` + RealPath string `json:"realPath,omitempty"` + Childs map[string]*FileEntry `json:"childs"` + + Type EntryType `json:"type"` + CompressedSize int64 `json:"compressedSize,omitempty"` + Offset int64 `json:"offset,omitempty"` + Digest digest.Digest `json:"digest"` } func (fe *FileEntry) DeepCopy() *FileEntry { @@ -139,6 +143,11 @@ func (fe FileEntry) IsSame() bool { func (fe FileEntry) IsLink() bool { return fe.Type == FILE_ENTRY_SYMLINK } +func (fe FileEntry) IsFile() bool { + return fe.Type == FILE_ENTRY_FILE_DIFF || + fe.Type == FILE_ENTRY_FILE_NEW || + fe.Type == FILE_ENTRY_FILE_SAME +} func (fe FileEntry) IsBaseRequired() bool { return fe.Type == FILE_ENTRY_FILE_DIFF || @@ -185,3 +194,70 @@ func (fe *FileEntry) lookupImpl(paths []string) (*FileEntry, error) { } return child.lookupImpl(paths[1:]) } + +type feForDigest struct { + Name string `json:"name"` + Size int `json:"size"` + Mode uint32 `json:"mode"` + UID uint32 `json:"uid"` + GID uint32 `json:"gid"` + RealPath string `json:"realPath,omitempty"` + Childs []digest.Digest `json:"childs"` +} + +func (fe *FileEntry) feForDigest() (*feForDigest, error) { + res := &feForDigest{ + Name: fe.Name, + Size: fe.Size, + Mode: fe.Mode, + UID: fe.UID, + GID: fe.GID, + RealPath: fe.RealPath, + Childs: []digest.Digest{}, + } + + childNames := []string{} + for name := range fe.Childs { + childNames = append(childNames, name) + } + slices.Sort(childNames) + + for _, name := range childNames { + c := fe.Childs[name] + if c.Digest == "" { + return nil, fmt.Errorf("child %s does not have digest", name) + } + res.Childs = append(res.Childs, c.Digest) + } + + return res, nil +} + +func (fe *FileEntry) GenerateDigest(body []byte) (digest.Digest, error) { + fed, err := fe.feForDigest() + if err != nil { + return "", nil + } + feBytes, err := json.Marshal(fed) + if err != nil { + return "", nil + } + + if fe.IsFile() { + feBytes = append(feBytes, body...) + } + return digest.FromBytes(feBytes), nil +} + +func (fe *FileEntry) Verify(body []byte) error { + d, err := fe.GenerateDigest(body) + if err != nil { + return nil + } + + if d != fe.Digest { + return fmt.Errorf("failed to verify digest") + } + + return nil +} diff --git a/pkg/image/pack.go b/pkg/image/pack.go index f9fc0c5..d4f031f 100644 --- a/pkg/image/pack.go +++ b/pkg/image/pack.go @@ -90,7 +90,6 @@ func packDirImplMultithread(dirPath string, layer v1.Layer, outDirEntry *FileEnt break } ct.entry.CompressedSize = int64(outBuffer.Len()) - ct.entry.Type = FILE_ENTRY_FILE_NEW ct.data = outBuffer writeTasks <- ct } @@ -151,29 +150,37 @@ func enqueuePackTaskToChannel(dirPath string, parentEntry *FileEntry, taskChan c } entry.Type = FILE_ENTRY_SYMLINK entry.RealPath = realPath + entry.Digest, err = entry.GenerateDigest(nil) + if err != nil { + return err + } parentEntry.Childs[fName] = entry - continue - } - - err = entry.SetUGID(dirFilePath) - if err != nil { - return err - } + } else { + entry.Type = FILE_ENTRY_FILE_NEW + err = entry.SetUGID(dirFilePath) + if err != nil { + return err + } - entry.Size, err = getFileSize(dirFilePath) - if err != nil { - return err - } + entry.Size, err = getFileSize(dirFilePath) + if err != nil { + return err + } - fileBody, err := readFileAll(dirFilePath) - if err != nil { - return fmt.Errorf("failed to read file %s: %v", dirFilePath, err) - } + fileBody, err := readFileAll(dirFilePath) + if err != nil { + return fmt.Errorf("failed to read file %s: %v", dirFilePath, err) + } + entry.Digest, err = entry.GenerateDigest(fileBody) + if err != nil { + return err + } - parentEntry.Childs[fName] = entry - taskChan <- packTask{ - entry: entry, - data: bytes.NewBuffer(fileBody), + parentEntry.Childs[fName] = entry + taskChan <- packTask{ + entry: entry, + data: bytes.NewBuffer(fileBody), + } } } @@ -201,6 +208,10 @@ func enqueuePackTaskToChannel(dirPath string, parentEntry *FileEntry, taskChan c } parentEntry.Type = FILE_ENTRY_DIR_NEW + parentEntry.Digest, err = parentEntry.GenerateDigest(nil) + if err != nil { + return err + } return nil } @@ -254,21 +265,38 @@ func enqueuePackTaskToChannelFromLayer(layer v1.Layer, rootEntry *FileEntry, tas if err != nil { return fmt.Errorf("failed to copy %s: %v", header.Name, err) } + entry.Digest, err = entry.GenerateDigest(data.Bytes()) + if err != nil { + return err + } taskChan <- packTask{ entry: entry, data: &data, } + } else { + entry.Digest, err = entry.GenerateDigest(nil) + if err != nil { + return err + } } dirEntry.Childs[basename] = entry files[header.Name] = entry case tar.TypeSymlink: entry.Type = FILE_ENTRY_SYMLINK entry.RealPath = header.Linkname + entry.Digest, err = entry.GenerateDigest(nil) + if err != nil { + return err + } dirEntry.Childs[basename] = entry files[header.Name] = entry case tar.TypeLink: entry.Type = FILE_ENTRY_HARDLINK entry.RealPath = header.Linkname + entry.Digest, err = entry.GenerateDigest(nil) + if err != nil { + return err + } dirEntry.Childs[basename] = entry files[header.Name] = entry case tar.TypeBlock, tar.TypeChar, tar.TypeFifo: @@ -277,6 +305,30 @@ func enqueuePackTaskToChannelFromLayer(layer v1.Layer, rootEntry *FileEntry, tas return fmt.Errorf("file %s has unexpected type flag: %d", header.Name, header.Typeflag) } } + + err = generateDigestDir(rootEntry) + if err != nil { + return err + } + return nil +} + +func generateDigestDir(parentFe *FileEntry) error { + if !parentFe.IsDir() { + return nil + } + for name := range parentFe.Childs { + c := parentFe.Childs[name] + err := generateDigestDir(c) + if err != nil { + return err + } + } + d, err := parentFe.GenerateDigest(nil) + if err != nil { + return err + } + parentFe.Digest = d return nil } diff --git a/pkg/image/patch.go b/pkg/image/patch.go index 4ed2d33..7246081 100644 --- a/pkg/image/patch.go +++ b/pkg/image/patch.go @@ -184,5 +184,23 @@ func applyPatchImpl(basePath, newPath string, dirEntry *FileEntry, img *DimgFile return nil, fmt.Errorf("unexpected error type=%v", dirEntry.Type) } + data := []byte{} + if dirEntry.IsFile() { + f, err := os.Open(newFilePath) + if err != nil { + return nil, err + } + defer f.Close() + + data, err = io.ReadAll(f) + if err != nil { + return nil, err + } + } + err := dirEntry.Verify(data) + if err != nil { + return nil, fmt.Errorf("failed to verify %s(%d, %d): %v", newFilePath, dirEntry.Type, dirEntry.Size, err) + } + return hardlinks, nil } diff --git a/tests/bench_impl.sh b/tests/bench_impl.sh index bd17d8b..db4e519 100755 --- a/tests/bench_impl.sh +++ b/tests/bench_impl.sh @@ -87,7 +87,7 @@ for ((i=0; i < $(expr ${#IMAGE_VERSIONS[@]} - 1); i++));do # invalidate file and page cache # some environments (e.g. GHA) does not allow to modify this value set +u - if [ $RUNNER != "GHA" ]; then + if [ "$RUNNER" != "GHA" ]; then echo 3 | sudo tee /proc/sys/vm/drop_caches fi set -u