From 262dc6e8165bbaf7d29e56ad0d79a064ea239c3b Mon Sep 17 00:00:00 2001 From: Deepthi Date: Tue, 21 May 2024 08:28:34 +0530 Subject: [PATCH 1/3] #416 Fix - Files of the same name in sub-folders will be considered as different files --- checksumcalculator/checksumcalculator_test.go | 36 ++++++++++++++-- cmd/acceptance_test.go | 24 +++++++++++ detector/helpers/checksum_compare_test.go | 2 +- gitrepo/gitrepo.go | 42 ++++++++++--------- gitrepo/gitrepo_test.go | 36 +++++++++++++++- talismanrc/scopes.go | 2 +- 6 files changed, 116 insertions(+), 26 deletions(-) diff --git a/checksumcalculator/checksumcalculator_test.go b/checksumcalculator/checksumcalculator_test.go index 9acf8918..aba44a9f 100644 --- a/checksumcalculator/checksumcalculator_test.go +++ b/checksumcalculator/checksumcalculator_test.go @@ -34,18 +34,48 @@ func TestNewChecksumCalculator(t *testing.T) { t.Run("should return CollectiveChecksum when existing file name pattern is sent", func(t *testing.T) { gitAdditions := []gitrepo.Addition{ { - Path: "GitRepoPath1", + Path: "GitRepoPath1/GitRepoName1", Name: "GitRepoName1", }, } - expectedCC := "54bbf09e5c906e2d7cc0808729f8120cfa3c4bad3fb6a85689ae23ca00e5a3c8" - fileNamePattern := "*RepoName1" + expectedCC := "19250a996e1200d33e91454bc662efae7682410e5347cfc56b0ff386dfbc10ae" + fileNamePattern := "GitRepoPath1/" cc := NewChecksumCalculator(defaultSHA256Hasher, gitAdditions) actualCC := cc.CalculateCollectiveChecksumForPattern(fileNamePattern) assert.Equal(t, expectedCC, actualCC) }) + + t.Run("should return the files own CollectiveChecksum when same file name is present in subfolders", func(t *testing.T) { + gitAdditions := []gitrepo.Addition{ + { + Path: "hello.txt", + Name: "hello.txt", + }, + { + Path: "subfolder/hello.txt", + Name: "hello.txt", + }, + } + hello_expectedCC := "9d30c2e4bcf181bba07374cc416f1892d89918038bce5172776475347c4d2d69" + fileNamePattern1 := "hello.txt" + cc1 := NewChecksumCalculator(defaultSHA256Hasher, gitAdditions) + actualCC := cc1.CalculateCollectiveChecksumForPattern(fileNamePattern1) + assert.Equal(t, hello_expectedCC, actualCC) + + subfolder_hello_expectedCC := "6c779c16bcc2e63c659be7649a531650210d6b96ae590a146f9ccdca383587f6" + fileNamePattern2 := "subfolder/hello.txt" + cc2 := NewChecksumCalculator(defaultSHA256Hasher, gitAdditions) + subfolder_actualCC := cc2.CalculateCollectiveChecksumForPattern(fileNamePattern2) + assert.Equal(t, subfolder_hello_expectedCC, subfolder_actualCC) + + all_txt_expectedCC := "aba77c9077539130e21a8f275fc5f1f43b7f0589e392bc89e4b96c578f0a9184" + fileNamePattern3 := "*.txt" + cc3 := NewChecksumCalculator(defaultSHA256Hasher, gitAdditions) + all_txt_actualCC := cc3.CalculateCollectiveChecksumForPattern(fileNamePattern3) + assert.Equal(t, all_txt_expectedCC, all_txt_actualCC) + }) } func TestDefaultChecksumCalculator_SuggestTalismanRC(t *testing.T) { diff --git a/cmd/acceptance_test.go b/cmd/acceptance_test.go index ea0b04cc..b13061ec 100644 --- a/cmd/acceptance_test.go +++ b/cmd/acceptance_test.go @@ -43,6 +43,13 @@ fileignoreconfig: ignore_detectors: [] ` +const talismanRCForHelloTxtFile = ` +fileignoreconfig: +- filename: hello.txt + checksum: edf37f1d33525d1710c3f5dc3437778c483def1d2e98b5a3495fc627eb1be588 + ignore_detectors: [] +` + func init() { git_testing.Logger = logrus.WithField("Environment", "Debug") git_testing.Logger.Debug("Acceptance test started") @@ -261,6 +268,23 @@ func TestPatternFindsSecretInNestedFile(t *testing.T) { }) } +func TestFilesWithSameNameWithinRepositoryAreHandledAsSeparateFiles(t *testing.T) { + withNewTmpGitRepo(func(git *git_testing.GitTesting) { + git.SetupBaselineFiles("simple-file", "some-dir/hello.txt") + git.CreateFileWithContents("hello.txt", awsAccessKeyIDExample) + git.AddAndcommit("*", "Start of Scan before talismanrc") + assert.Equal(t, 1, runTalismanInPrePushMode(git), "Expected run() to return 1 since secret is detected in hello") + git.CreateFileWithContents(".talismanrc", talismanRCForHelloTxtFile) + + git.AppendFileContent("some-dir/hello.txt", "More safe content") + assert.Equal(t, 0, runTalismanInPrePushMode(git), "Expected run() to return 0 since hello checksum is added to fileignoreconfig in talismanrc") + git.AddAndcommit("some-dir/hello.txt", "Start of second scan") + assert.Equal(t, 0, runTalismanInPrePushMode(git), "Expected run() to return 0 since hello in subfolder was only changed") + git.AppendFileContent("hello.txt", "More safe content in unsafe file") + git.AddAndcommit("hello.txt", "Start of third scan - new hash is required") + assert.Equal(t, 1, runTalismanInPrePushMode(git), "Expected run() to return 1 since hello checksum is changed") + }) +} func TestIgnoreHistoryDoesNotDetectRemovedSecrets(t *testing.T) { withNewTmpGitRepo(func(git *git_testing.GitTesting) { options.Debug = false diff --git a/detector/helpers/checksum_compare_test.go b/detector/helpers/checksum_compare_test.go index 879b0fa5..d8416fa2 100644 --- a/detector/helpers/checksum_compare_test.go +++ b/detector/helpers/checksum_compare_test.go @@ -49,7 +49,7 @@ func TestChecksumCompare_IsScanNotRequired(t *testing.T) { }, } cc := NewChecksumCompare(checksumCalculator, mockSHA256Hasher, &ignoreConfig) - addition := gitrepo.Addition{Name: "some.txt"} + addition := gitrepo.Addition{Name: "some.txt", Path: "some.txt"} mockSHA256Hasher.EXPECT().CollectiveSHA256Hash([]string{string(addition.Path)}).Return("somesha") checksumCalculator.EXPECT().CalculateCollectiveChecksumForPattern("some.txt").Return("sha1") diff --git a/gitrepo/gitrepo.go b/gitrepo/gitrepo.go index 50470576..c35b4e26 100644 --- a/gitrepo/gitrepo.go +++ b/gitrepo/gitrepo.go @@ -12,13 +12,13 @@ import ( log "github.com/sirupsen/logrus" ) -//FilePath represents the absolute path of an added file +// FilePath represents the absolute path of an added file type FilePath string -//FileName represents the base name of an added file +// FileName represents the base name of an added file type FileName string -//Addition represents the end state of a file +// Addition represents the end state of a file type Addition struct { Path FilePath Name FileName @@ -26,13 +26,13 @@ type Addition struct { Data []byte } -//GitRepo represents a Git repository located at the absolute path represented by root +// GitRepo represents a Git repository located at the absolute path represented by root type GitRepo struct { root string } -//RepoLocatedAt returns a new GitRepo with it's root located at the location specified by the argument. -//If the argument is not an absolute path, it will be turned into one. +// RepoLocatedAt returns a new GitRepo with it's root located at the location specified by the argument. +// If the argument is not an absolute path, it will be turned into one. func RepoLocatedAt(path string) GitRepo { absoluteRoot, _ := filepath.Abs(path) return GitRepo{absoluteRoot} @@ -117,7 +117,7 @@ func MatchGitDiffLine(gitDiffString string) (bool, string) { return false, "" } -//StagedAdditions returns the files staged for commit in a GitRepo +// StagedAdditions returns the files staged for commit in a GitRepo func (repo GitRepo) StagedAdditions() []Addition { files := repo.stagedFiles() result := make([]Addition, len(files)) @@ -132,7 +132,7 @@ func (repo GitRepo) StagedAdditions() []Addition { return result } -//AllAdditions returns all the outgoing additions and modifications in a GitRepo. This does not include files that were deleted. +// AllAdditions returns all the outgoing additions and modifications in a GitRepo. This does not include files that were deleted. func (repo GitRepo) AllAdditions() []Addition { result := string(repo.executeRepoCommand("git", "rev-parse", "--abbrev-ref", "origin/HEAD")) log.Debugf("Result of getting default branch %v", result) @@ -141,7 +141,7 @@ func (repo GitRepo) AllAdditions() []Addition { return repo.AdditionsWithinRange(oldCommit, newCommit) } -//AdditionsWithinRange returns the outgoing additions and modifications in a GitRepo that are in the given commit range. This does not include files that were deleted. +// AdditionsWithinRange returns the outgoing additions and modifications in a GitRepo that are in the given commit range. This does not include files that were deleted. func (repo GitRepo) AdditionsWithinRange(oldCommit string, newCommit string) []Addition { files := repo.outgoingNonDeletedFiles(oldCommit, newCommit) result := make([]Addition, len(files)) @@ -157,7 +157,7 @@ func (repo GitRepo) AdditionsWithinRange(oldCommit string, newCommit string) []A return result } -//NewAddition returns a new Addition for a file with supplied name and contents +// NewAddition returns a new Addition for a file with supplied name and contents func NewAddition(filePath string, content []byte) Addition { return Addition{ Path: FilePath(filePath), @@ -166,7 +166,7 @@ func NewAddition(filePath string, content []byte) Addition { } } -//NewScannerAddition returns an new Addition for a file with supplied contents and all of the commits the file is in +// NewScannerAddition returns an new Addition for a file with supplied contents and all of the commits the file is in func NewScannerAddition(filePath string, commits []string, content []byte) Addition { return Addition{ Path: FilePath(filePath), @@ -176,9 +176,9 @@ func NewScannerAddition(filePath string, commits []string, content []byte) Addit } } -//CheckIfFileExists checks if the file exists on the file system. Does not look into the file contents -//Returns TRUE if file exists -//Returns FALSE if the file is not found +// CheckIfFileExists checks if the file exists on the file system. Does not look into the file contents +// Returns TRUE if file exists +// Returns FALSE if the file is not found func (repo GitRepo) CheckIfFileExists(fileName string) bool { filepath := path.Join(repo.root, fileName) if _, err := os.Stat(filepath); os.IsNotExist(err) { @@ -187,18 +187,20 @@ func (repo GitRepo) CheckIfFileExists(fileName string) bool { return true } -//Matches states whether the addition matches the given pattern. -//If the pattern ends in a path separator, then all files inside a directory with that name are matched. However, files with that name itself will not be matched. -//If a pattern contains the path separator in any other location, the match works according to the pattern logic of the default golang glob mechanism -//If there is no path separator anywhere in the pattern, the pattern is matched against the base name of the file. Thus, the pattern will match files with that name anywhere in the repository. +// Matches states whether the addition matches the given pattern. +// If the pattern ends in a path separator, then all files inside a directory with that name are matched. However, files with that name itself will not be matched. +// If a pattern contains the path separator in any other location, the match works according to the pattern logic of the default golang glob mechanism +// If there is no path separator anywhere in the pattern, the pattern is matched against the base name of the file. Thus, the pattern will match files with that name anywhere in the repository. func (a Addition) Matches(pattern string) bool { var result bool if pattern[len(pattern)-1] == '/' { result = strings.HasPrefix(string(a.Path), pattern) } else if strings.ContainsRune(pattern, '/') { result, _ = path.Match(pattern, string(a.Path)) - } else { + } else if strings.ContainsRune(pattern, '*') { result, _ = path.Match(pattern, string(a.Name)) + } else { + result = strings.Compare(string(a.Path), pattern) == 0 } log.WithFields(log.Fields{ "pattern": pattern, @@ -208,7 +210,7 @@ func (a Addition) Matches(pattern string) bool { return result } -//TrackedFilesAsAdditions returns all of the tracked files in a GitRepo as Additions +// TrackedFilesAsAdditions returns all of the tracked files in a GitRepo as Additions func (repo GitRepo) TrackedFilesAsAdditions() []Addition { trackedFilePaths := repo.trackedFilePaths() var additions []Addition diff --git a/gitrepo/gitrepo_test.go b/gitrepo/gitrepo_test.go index 2191b248..19d5256d 100644 --- a/gitrepo/gitrepo_test.go +++ b/gitrepo/gitrepo_test.go @@ -215,11 +215,45 @@ func TestMatchShouldAllowWildcardPatternMatches(t *testing.T) { assert.False(t, file3.Matches(pattern)) } +func TestMatchShouldMatchExactFileIfNoPatternIsProvided(t *testing.T) { + file1 := Addition{Path: "bigfile", Name: "bigfile"} + file2 := Addition{Path: "subfolder/bigfile", Name: "bigfile"} + file3 := Addition{Path: "somefile", Name: "somefile"} + pattern := "bigfile" + + assert.True(t, file1.Matches(pattern)) + assert.False(t, file2.Matches(pattern)) + assert.False(t, file3.Matches(pattern)) +} + +func TestMatchShouldAllowStarPattern(t *testing.T) { + file1 := Addition{Path: "GitRepoPath1/File1.txt", Name: "File1.txt"} + file2 := Addition{Path: "GitRepoPath1/File2.txt", Name: "File2.txt"} + file3 := Addition{Path: "GitRepoPath1/somefile", Name: "somefile"} + file4 := Addition{Path: "somefile.jpg", Name: "somefile.jpg"} + file5 := Addition{Path: "somefile.txt", Name: "somefile.txt"} + + pattern := "GitRepoPath1/*.txt" + + assert.True(t, file1.Matches(pattern)) + assert.True(t, file2.Matches(pattern)) + assert.False(t, file3.Matches(pattern)) + assert.False(t, file4.Matches(pattern)) + assert.False(t, file5.Matches(pattern)) + + pattern1 := "*.txt" + assert.True(t, file1.Matches(pattern1)) + assert.True(t, file2.Matches(pattern1)) + assert.False(t, file3.Matches(pattern1)) + assert.False(t, file4.Matches(pattern1)) + assert.True(t, file5.Matches(pattern1)) +} + func setupOriginAndClones(originLocation, cloneLocation string) (*git_testing.GitTesting, GitRepo) { origin := RepoLocatedAt(originLocation) git := git_testing.Init(origin.root) git.SetupBaselineFiles("a.txt", filepath.Join("alice", "bob", "b.txt")) - git.SetupBaselineFiles("c.txt", filepath.Join( "folder b","c.txt")) + git.SetupBaselineFiles("c.txt", filepath.Join("folder b", "c.txt")) cwd, _ := os.Getwd() gitClone := git.GitClone(filepath.Join(cwd, cloneLocation)) return gitClone, RepoLocatedAt(cloneLocation) diff --git a/talismanrc/scopes.go b/talismanrc/scopes.go index 18da1258..e5db0adf 100644 --- a/talismanrc/scopes.go +++ b/talismanrc/scopes.go @@ -5,7 +5,7 @@ var knownScopes = map[string][]string{ "go": {"makefile", "go.mod", "go.sum", "Gopkg.toml", "Gopkg.lock", "glide.yaml", "glide.lock"}, "images": {"*.jpeg", "*.jpg", "*.png", "*.tiff", "*.bmp"}, "bazel": {"*.bzl"}, - "terraform": {".terraform.lock.hcl"}, + "terraform": {".terraform.lock.hcl", "*.terraform.lock.hcl"}, "php": {"composer.lock"}, "python": {"poetry.lock", "Pipfile.lock", "requirements.txt"}, } From 9785d246e0509dcf7b0e575dc66c3d9996c9c8c5 Mon Sep 17 00:00:00 2001 From: Deepthi Date: Mon, 3 Jun 2024 08:34:35 +0530 Subject: [PATCH 2/3] #416 Fix - Adding more testcases and covering all possible special characters --- gitrepo/gitrepo.go | 11 ++++++----- gitrepo/gitrepo_test.go | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/gitrepo/gitrepo.go b/gitrepo/gitrepo.go index c35b4e26..42edd3ed 100644 --- a/gitrepo/gitrepo.go +++ b/gitrepo/gitrepo.go @@ -190,16 +190,17 @@ func (repo GitRepo) CheckIfFileExists(fileName string) bool { // Matches states whether the addition matches the given pattern. // If the pattern ends in a path separator, then all files inside a directory with that name are matched. However, files with that name itself will not be matched. // If a pattern contains the path separator in any other location, the match works according to the pattern logic of the default golang glob mechanism -// If there is no path separator anywhere in the pattern, the pattern is matched against the base name of the file. Thus, the pattern will match files with that name anywhere in the repository. +// If there are other special characters in the pattern, the pattern is matched against the base name of the file. Thus, the pattern will match files with that pattern anywhere in the repository. +// If there are no special characters in the pattern, then it means exact filename is provided as pattern like file.txt. Thus, the pattern is matched against the file path so that not all files with the same name in the repo are not returned. func (a Addition) Matches(pattern string) bool { var result bool - if pattern[len(pattern)-1] == '/' { + if pattern[len(pattern)-1] == '/' { // If the pattern ends in a path separator, then all files inside a directory with that name are matched. However, files with that name itself will not be matched. result = strings.HasPrefix(string(a.Path), pattern) - } else if strings.ContainsRune(pattern, '/') { + } else if strings.ContainsRune(pattern, '/') { // If a pattern contains the path separator in any other location, the match works according to the pattern logic of the default golang glob mechanism result, _ = path.Match(pattern, string(a.Path)) - } else if strings.ContainsRune(pattern, '*') { + } else if strings.ContainsAny(pattern, "*?[]\\") { // If there are other special characters in the pattern, the pattern is matched against the base name of the file. Thus, the pattern will match files with that pattern anywhere in the repository. result, _ = path.Match(pattern, string(a.Name)) - } else { + } else { // If there are no special characters in the pattern, then it means exact filename is provided as pattern like file.txt. Thus, the pattern is matched against the file path so that not all files with the same name in the repo are not returned. result = strings.Compare(string(a.Path), pattern) == 0 } log.WithFields(log.Fields{ diff --git a/gitrepo/gitrepo_test.go b/gitrepo/gitrepo_test.go index 19d5256d..deac2bae 100644 --- a/gitrepo/gitrepo_test.go +++ b/gitrepo/gitrepo_test.go @@ -232,6 +232,8 @@ func TestMatchShouldAllowStarPattern(t *testing.T) { file3 := Addition{Path: "GitRepoPath1/somefile", Name: "somefile"} file4 := Addition{Path: "somefile.jpg", Name: "somefile.jpg"} file5 := Addition{Path: "somefile.txt", Name: "somefile.txt"} + file6 := Addition{Path: "File1.txt", Name: "File1.txt"} + file7 := Addition{Path: "File3.txt", Name: "File3.txt"} pattern := "GitRepoPath1/*.txt" @@ -247,6 +249,26 @@ func TestMatchShouldAllowStarPattern(t *testing.T) { assert.False(t, file3.Matches(pattern1)) assert.False(t, file4.Matches(pattern1)) assert.True(t, file5.Matches(pattern1)) + + pattern2 := "File?.txt" + assert.True(t, file1.Matches(pattern2)) + assert.True(t, file2.Matches(pattern2)) + assert.True(t, file6.Matches(pattern2)) + + pattern3 := "File[1].txt" + assert.True(t, file1.Matches(pattern3)) + assert.False(t, file2.Matches(pattern3)) + assert.True(t, file6.Matches(pattern3)) + + pattern4 := "File[1-2].txt" + assert.True(t, file1.Matches(pattern4)) + assert.True(t, file2.Matches(pattern4)) + assert.True(t, file6.Matches(pattern4)) + assert.False(t, file7.Matches(pattern4)) + + pattern5 := "File\\1.txt" + assert.True(t, file1.Matches(pattern5)) + assert.True(t, file6.Matches(pattern5)) } func setupOriginAndClones(originLocation, cloneLocation string) (*git_testing.GitTesting, GitRepo) { From d7e8693a2a8cfb0cc13518f3115f223aa7f6fbe5 Mon Sep 17 00:00:00 2001 From: Deepthi Date: Fri, 7 Jun 2024 07:56:23 +0530 Subject: [PATCH 3/3] Upgrade to CodeQL Action v3 --- .github/workflows/codeql.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 012d44b8..622fbc2a 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -42,7 +42,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -56,7 +56,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # ℹī¸ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -69,6 +69,6 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 with: category: "/language:${{matrix.language}}"