From 8b5ab4d5dfd33141f2e58435a579fc8362f2858e Mon Sep 17 00:00:00 2001 From: Catarina Paralta <46568597+paralta@users.noreply.github.com> Date: Thu, 25 Jul 2024 11:44:39 +0100 Subject: [PATCH] feat: generate hash concurrently --- scanner/utils/hash.go | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/scanner/utils/hash.go b/scanner/utils/hash.go index 6adab554a3..c244522910 100644 --- a/scanner/utils/hash.go +++ b/scanner/utils/hash.go @@ -17,7 +17,6 @@ package utils import ( "crypto/sha256" - "errors" "fmt" "io" "io/fs" @@ -27,6 +26,7 @@ import ( "strings" "github.com/openclarity/vmclarity/scanner/common" + "github.com/sourcegraph/conc/iter" log "github.com/sirupsen/logrus" ) @@ -108,24 +108,34 @@ func dirFiles(dir string) ([]string, error) { // generateHash creates hashes for all files along with filenames and generates a hash for the hashes and filenames. func generateHash(files []string, open func(string) (io.ReadCloser, error)) (string, error) { - h := sha256.New() files = append([]string(nil), files...) sort.Strings(files) - for _, file := range files { - if strings.Contains(file, "\n") { - return "", errors.New("filenames with newlines are not supported") + + mapper := iter.Mapper[string, string]{ + MaxGoroutines: len(files) / 2, + } + + results := mapper.Map(files, func(f *string) string { + if strings.Contains(*f, "\n") { + return "" } - r, err := open(file) + r, err := open(*f) if err != nil { - return "", fmt.Errorf("failed to open file %s: %w", file, err) + return "" } hf := sha256.New() _, err = io.Copy(hf, r) r.Close() if err != nil { - return "", fmt.Errorf("failed to create hash for file %s: %w", file, err) + return "" } - fmt.Fprintf(h, "%x %s\n", hf.Sum(nil), file) + return fmt.Sprintf("%x %s\n", hf.Sum(nil), *f) + }) + + h := sha256.New() + for _, result := range results { + fmt.Fprintf(h, "%s", result) } + return fmt.Sprintf("%x", h.Sum(nil)), nil // nolint:perfsprint }