diff --git a/scanner/utils/hash.go b/scanner/utils/hash.go index 6adab554a..88c1dc229 100644 --- a/scanner/utils/hash.go +++ b/scanner/utils/hash.go @@ -23,9 +23,12 @@ import ( "io/fs" "os" "path/filepath" + "runtime" "sort" "strings" + "github.com/sourcegraph/conc/iter" + "github.com/openclarity/vmclarity/scanner/common" log "github.com/sirupsen/logrus" @@ -108,24 +111,43 @@ func dirFiles(dir string) ([]string, error) { // generateHash creates hashes for all files along with filenames and generates a hash for the hashes and filenames. func generateHash(files []string, open func(string) (io.ReadCloser, error)) (string, error) { - h := sha256.New() files = append([]string(nil), files...) sort.Strings(files) - for _, file := range files { - if strings.Contains(file, "\n") { - return "", errors.New("filenames with newlines are not supported") - } - r, err := open(file) - if err != nil { - return "", fmt.Errorf("failed to open file %s: %w", file, err) - } - hf := sha256.New() - _, err = io.Copy(hf, r) - r.Close() - if err != nil { - return "", fmt.Errorf("failed to create hash for file %s: %w", file, err) - } - fmt.Fprintf(h, "%x %s\n", hf.Sum(nil), file) + + mapper := iter.Mapper[string, string]{ + MaxGoroutines: runtime.GOMAXPROCS(0), + } + + results, err := mapper.MapErr(files, func(f *string) (string, error) { + // Return the hash of the file + return processFile(f, open) + }) + if err != nil { + return "", fmt.Errorf("failed to generate hash for files: %w", err) } + + h := sha256.New() + for _, result := range results { + fmt.Fprintf(h, "%s", result) + } + return fmt.Sprintf("%x", h.Sum(nil)), nil // nolint:perfsprint } + +func processFile(f *string, open func(string) (io.ReadCloser, error)) (string, error) { + if strings.Contains(*f, "\n") { + return "", errors.New("filenames with newlines are not supported") + } + r, err := open(*f) + if err != nil { + return "", fmt.Errorf("failed to open file %s: %w", *f, err) + } + hf := sha256.New() + _, err = io.Copy(hf, r) + r.Close() + if err != nil { + return "", fmt.Errorf("failed to create hash for file %s: %w", *f, err) + } + + return fmt.Sprintf("%x %s\n", hf.Sum(nil), *f), nil +} diff --git a/scanner/utils/hash_test.go b/scanner/utils/hash_test.go new file mode 100644 index 000000000..3c2eb6803 --- /dev/null +++ b/scanner/utils/hash_test.go @@ -0,0 +1,52 @@ +// Copyright © 2024 Cisco Systems, Inc. and its affiliates. +// All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "testing" + + "github.com/openclarity/vmclarity/scanner/common" +) + +func TestGenerateHash(t *testing.T) { + type args struct { + s string + } + tests := []struct { + name string + args args + want string + }{ + { + name: "non-empty dir", + args: args{ + s: "testdata", + }, + want: "93039ae6c8721d9acb744804c624edf91e5caf7912e0b709c81af0e3eb14bda6", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := GenerateHash(common.DIR, tt.args.s) + if err != nil { + t.Errorf("GenerateHash() error = %v", err) + } + if got != tt.want { + t.Errorf("GenerateHash() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/scanner/utils/testdata/test.txt b/scanner/utils/testdata/test.txt new file mode 100644 index 000000000..08e00ed29 --- /dev/null +++ b/scanner/utils/testdata/test.txt @@ -0,0 +1 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. \ No newline at end of file