Skip to content

Commit

Permalink
Expose unindexed directory resolver option
Browse files Browse the repository at this point in the history
- Implements some new functions on unindexed directory which previously had 'panic' for their impl
- Fixed RelativeFileByPath for unindexed directory resolver

Signed-off-by: adammcclenaghan <[email protected]>
  • Loading branch information
adammcclenaghan committed Oct 21, 2024
1 parent 5c0df63 commit 019df1c
Show file tree
Hide file tree
Showing 10 changed files with 208 additions and 59 deletions.
3 changes: 2 additions & 1 deletion cmd/syft/internal/commands/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ func getSource(ctx context.Context, opts *options.Catalog, userInput string, sou
}).
WithBasePath(opts.Source.BasePath).
WithSources(sources...).
WithDefaultImagePullSource(opts.Source.Image.DefaultPullSource)
WithDefaultImagePullSource(opts.Source.Image.DefaultPullSource).
WithUnindexed(opts.Unindexed)

var err error
var platform *image.Platform
Expand Down
4 changes: 4 additions & 0 deletions cmd/syft/internal/options/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ type Catalog struct {
Platform string `yaml:"platform" json:"platform" mapstructure:"platform"`
Source sourceConfig `yaml:"source" json:"source" mapstructure:"source"`
Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"`
Unindexed bool `yaml:"unindexed" json:"unindexed" mapstructure:"unindexed"`

// configuration for inclusion of unknown information within elements
Unknowns unknownsConfig `yaml:"unknowns" mapstructure:"unknowns"`
Expand Down Expand Up @@ -228,6 +229,9 @@ func (cfg *Catalog) AddFlags(flags clio.FlagSet) {

flags.StringVarP(&cfg.Source.BasePath, "base-path", "",
"base directory for scanning, no links will be followed above this directory, and all paths will be reported relative to this directory")

flags.BoolVarP(&cfg.Unindexed, "unindexed", "",
"whether to index the file system or not, indexing can improve scan times but incurs a memory overhead, default false.")
}

func (cfg *Catalog) DescribeFields(descriptions fangs.FieldDescriptionSet) {
Expand Down
5 changes: 5 additions & 0 deletions syft/get_source_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ func (c *GetSourceConfig) WithAlias(alias source.Alias) *GetSourceConfig {
return c
}

func (c *GetSourceConfig) WithUnindexed(unindexed bool) *GetSourceConfig {
c.SourceProviderConfig = c.SourceProviderConfig.WithUnindexed(unindexed)
return c
}

func (c *GetSourceConfig) WithRegistryOptions(registryOptions *image.RegistryOptions) *GetSourceConfig {
c.SourceProviderConfig = c.SourceProviderConfig.WithRegistryOptions(registryOptions)
return c
Expand Down
126 changes: 92 additions & 34 deletions syft/internal/fileresolver/unindexed_directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,32 @@ import (
"github.com/mitchellh/go-homedir"
"github.com/spf13/afero"

stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
syftFile "github.com/anchore/syft/syft/file"
)

var _ file.Resolver = (*UnindexedDirectory)(nil)
var _ file.WritableResolver = (*UnindexedDirectory)(nil)
var _ syftFile.Resolver = (*UnindexedDirectory)(nil)
var _ syftFile.WritableResolver = (*UnindexedDirectory)(nil)

type UnindexedDirectory struct {
ls afero.Lstater
lr afero.LinkReader
base string
dir string
fs afero.Fs
ls afero.Lstater
lr afero.LinkReader
base string
dir string
fs afero.Fs
pathFilters []PathIndexVisitor
}

func NewFromUnindexedDirectory(dir string) file.WritableResolver {
func NewFromUnindexedDirectory(dir string) syftFile.WritableResolver {
return NewFromUnindexedDirectoryFS(afero.NewOsFs(), dir, "")
}

func NewFromRootedUnindexedDirectory(dir string, base string) file.WritableResolver {
return NewFromUnindexedDirectoryFS(afero.NewOsFs(), dir, base)
func NewFromRootedUnindexedDirectory(dir string, base string, pathVisitors ...PathIndexVisitor) syftFile.WritableResolver {
return NewFromUnindexedDirectoryFS(afero.NewOsFs(), dir, base, pathVisitors...)
}

func NewFromUnindexedDirectoryFS(fs afero.Fs, dir string, base string) file.WritableResolver {
func NewFromUnindexedDirectoryFS(fs afero.Fs, dir string, base string, pathVisitors ...PathIndexVisitor) syftFile.WritableResolver {
ls, ok := fs.(afero.Lstater)
if !ok {
panic(fmt.Sprintf("unable to get afero.Lstater interface from: %+v", fs))
Expand Down Expand Up @@ -70,15 +72,16 @@ func NewFromUnindexedDirectoryFS(fs afero.Fs, dir string, base string) file.Writ
}
}
return UnindexedDirectory{
base: base,
dir: dir,
fs: fs,
ls: ls,
lr: lr,
base: base,
dir: dir,
fs: fs,
ls: ls,
lr: lr,
pathFilters: pathVisitors,
}
}

func (u UnindexedDirectory) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
func (u UnindexedDirectory) FileContentsByLocation(location syftFile.Location) (io.ReadCloser, error) {
p := u.absPath(u.scrubInputPath(location.RealPath))
f, err := u.fs.Open(p)
if err != nil {
Expand Down Expand Up @@ -149,11 +152,11 @@ func (u UnindexedDirectory) absPath(p string) string {

// - full symlink resolution should be performed on all requests
// - only returns locations to files (NOT directories)
func (u UnindexedDirectory) FilesByPath(paths ...string) (out []file.Location, _ error) {
func (u UnindexedDirectory) FilesByPath(paths ...string) (out []syftFile.Location, _ error) {
return u.filesByPath(true, false, paths...)
}

func (u UnindexedDirectory) filesByPath(resolveLinks bool, includeDirs bool, paths ...string) (out []file.Location, _ error) {
func (u UnindexedDirectory) filesByPath(resolveLinks bool, includeDirs bool, paths ...string) (out []syftFile.Location, _ error) {
// sort here for stable output
sort.Strings(paths)
nextPath:
Expand Down Expand Up @@ -183,11 +186,11 @@ nextPath:
// - full symlink resolution should be performed on all requests
// - if multiple paths to the same file are found, the best single match should be returned
// - only returns locations to files (NOT directories)
func (u UnindexedDirectory) FilesByGlob(patterns ...string) (out []file.Location, _ error) {
func (u UnindexedDirectory) FilesByGlob(patterns ...string) (out []syftFile.Location, _ error) {
return u.filesByGlob(true, false, patterns...)
}

func (u UnindexedDirectory) filesByGlob(resolveLinks bool, includeDirs bool, patterns ...string) (out []file.Location, _ error) {
func (u UnindexedDirectory) filesByGlob(resolveLinks bool, includeDirs bool, patterns ...string) (out []syftFile.Location, _ error) {
f := unindexedDirectoryResolverFS{
u: u,
}
Expand All @@ -206,14 +209,62 @@ func (u UnindexedDirectory) filesByGlob(resolveLinks bool, includeDirs bool, pat
return u.filesByPath(resolveLinks, includeDirs, paths...)
}

func (u UnindexedDirectory) FilesByMIMEType(_ ...string) ([]file.Location, error) {
panic("FilesByMIMEType unsupported")
// FilesByMIMEType fetches all of the files which match the provided MIME types.
// This requires walking the filetree from u.base and checking the MIME type of each file we encounter
// Handling of errors while walking is ignored unless a filterFn wants the directory to be skipped.
// TODO: afero.Walk will read all files in a single directory into memory, providing the same lexical ordering
// guarantees that golang's filepath.Walk implementation provides. However, when a single directory contains
// many files, this could cause Syft to run OOM. We could consider using a custom walk function in future
// which uses Readdir with a hardcoded value N >= 1 so that entire directories aren't read into memory each time.
func (u UnindexedDirectory) FilesByMIMEType(types ...string) ([]syftFile.Location, error) {
uniqueLocations := make([]syftFile.Location, 0)
err := afero.Walk(u.fs, u.absPath(u.base), func(p string, fi fs.FileInfo, walkErr error) error {
// Ignore any path for which a filter function returns true
for _, filterFn := range u.pathFilters {
if filterFn == nil {
continue
}

if filterErr := filterFn(u.base, p, fi, walkErr); filterErr != nil {
if errors.Is(filterErr, fs.SkipDir) {
// signal to walk() to skip this directory entirely (even if we're processing a file)
return filterErr
}
// skip this path but don't affect walk() trajectory
return nil
}
}

// If we get here, then the MIME type of the file should be checked
mimeType := stereoscopeFile.NewMetadataFromPath(p, fi).MIMEType
for _, mType := range types {
if mimeType == mType {
// Tidy the path, same as AllLocations
p = strings.TrimPrefix(p, u.dir)
if p == "" {
return nil
}
p = strings.TrimPrefix(p, "/")
uniqueLocations = append(uniqueLocations, syftFile.NewLocation(p))
}
}

// Continue to walk()
return nil
})

if err != nil {
log.Debug(err)
return nil, err
}

return uniqueLocations, nil
}

// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
// This is helpful when attempting to find a file that is in the same layer or lower as another file.
func (u UnindexedDirectory) RelativeFileByPath(l file.Location, p string) *file.Location {
p = path.Clean(path.Join(l.RealPath, p))
func (u UnindexedDirectory) RelativeFileByPath(l syftFile.Location, p string) *syftFile.Location {
p = path.Clean(p)
locs, err := u.filesByPath(true, false, p)
if err != nil || len(locs) == 0 {
return nil
Expand All @@ -228,8 +279,8 @@ func (u UnindexedDirectory) RelativeFileByPath(l file.Location, p string) *file.

// - NO symlink resolution should be performed on results
// - returns locations for any file or directory
func (u UnindexedDirectory) AllLocations(ctx context.Context) <-chan file.Location {
out := make(chan file.Location)
func (u UnindexedDirectory) AllLocations(ctx context.Context) <-chan syftFile.Location {
out := make(chan syftFile.Location)
errWalkCanceled := fmt.Errorf("walk canceled")
go func() {
defer close(out)
Expand All @@ -240,7 +291,7 @@ func (u UnindexedDirectory) AllLocations(ctx context.Context) <-chan file.Locati
}
p = strings.TrimPrefix(p, "/")
select {
case out <- file.NewLocation(p):
case out <- syftFile.NewLocation(p):
return nil
case <-ctx.Done():
return errWalkCanceled
Expand All @@ -253,11 +304,18 @@ func (u UnindexedDirectory) AllLocations(ctx context.Context) <-chan file.Locati
return out
}

func (u UnindexedDirectory) FileMetadataByLocation(_ file.Location) (file.Metadata, error) {
panic("FileMetadataByLocation unsupported")
func (u UnindexedDirectory) FileMetadataByLocation(loc syftFile.Location) (syftFile.Metadata, error) {
p := u.absPath(u.scrubInputPath(loc.RealPath))
finfo, err := u.fs.Stat(p)
if err != nil {
return syftFile.Metadata{}, err
}

metadata := stereoscopeFile.NewMetadataFromPath(p, finfo)
return metadata, nil
}

func (u UnindexedDirectory) Write(location file.Location, reader io.Reader) error {
func (u UnindexedDirectory) Write(location syftFile.Location, reader io.Reader) error {
filePath := location.RealPath
if path.IsAbs(filePath) {
filePath = filePath[1:]
Expand All @@ -266,7 +324,7 @@ func (u UnindexedDirectory) Write(location file.Location, reader io.Reader) erro
return afero.WriteReader(u.fs, absPath, reader)
}

func (u UnindexedDirectory) newLocation(filePath string, resolveLinks bool) *file.Location {
func (u UnindexedDirectory) newLocation(filePath string, resolveLinks bool) *syftFile.Location {
filePath = path.Clean(filePath)

virtualPath := filePath
Expand All @@ -287,7 +345,7 @@ func (u UnindexedDirectory) newLocation(filePath string, resolveLinks bool) *fil
}
}

l := file.NewVirtualLocation(realPath, virtualPath)
l := syftFile.NewVirtualLocation(realPath, virtualPath)
return &l
}

Expand Down
49 changes: 49 additions & 0 deletions syft/internal/fileresolver/unindexed_directory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,55 @@ func Test_WritableUnindexedDirectoryResolver(t *testing.T) {
require.Equal(t, c, string(bytes))
}

func Test_UnindexedDirectoryResolver_FilesByMIMEType(t *testing.T) {
tests := []struct {
fixturePath string
mimeType string
expectedPaths *strset.Set
}{
{
fixturePath: "./test-fixtures/image-simple",
mimeType: "text/plain",
expectedPaths: strset.New("file-1.txt", "file-2.txt", "target/really/nested/file-3.txt", "Dockerfile"),
},
}
for _, test := range tests {
t.Run(test.fixturePath, func(t *testing.T) {
resolver := NewFromUnindexedDirectory(test.fixturePath)
locations, err := resolver.FilesByMIMEType(test.mimeType)
assert.NoError(t, err)
assert.Equal(t, test.expectedPaths.Size(), len(locations))
for _, l := range locations {
assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath)
}
})
}
}

func Test_UnindexedDirectoryResolver_RelativeFileByPath(t *testing.T) {
cases := []struct {
name string
root string
searchFile string
expected *strset.Set
}{
{
name: "should find nested file from root",
root: "./test-fixtures/image-simple",
searchFile: "target/really/nested/file-3.txt",
expected: strset.New("target/really/nested/file-3.txt"),
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
resolver := NewFromUnindexedDirectory(c.root)
rootLoc := file.NewLocation(c.root)
loc := resolver.RelativeFileByPath(rootLoc, c.searchFile)
assert.True(t, c.expected.Has(loc.Coordinates.RealPath), "does not have path %q", loc.RealPath)
})
}
}

func testWithTimeout(t *testing.T, timeout time.Duration, test func(*testing.T)) {
done := make(chan bool)
go func() {
Expand Down
24 changes: 14 additions & 10 deletions syft/source/directorysource/directory_source.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,17 @@ import (
var _ source.Source = (*directorySource)(nil)

type Config struct {
Path string
Base string
Exclude source.ExcludeConfig
Alias source.Alias
Path string
Base string
Exclude source.ExcludeConfig
Alias source.Alias
Unindexed bool
}

type directorySource struct {
id artifact.ID
config Config
resolver *fileresolver.Directory
resolver file.Resolver
mutex *sync.Mutex
}

Expand Down Expand Up @@ -145,12 +146,15 @@ func (s *directorySource) FileResolver(_ source.Scope) (file.Resolver, error) {
// this should be the only file resolver that might have overlap with where files are cached
exclusionFunctions = append(exclusionFunctions, excludeCachePathVisitors()...)

res, err := fileresolver.NewFromDirectory(s.config.Path, s.config.Base, exclusionFunctions...)
if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
if s.config.Unindexed {
s.resolver = fileresolver.NewFromRootedUnindexedDirectory(s.config.Path, s.config.Base)
} else {
res, err := fileresolver.NewFromDirectory(s.config.Path, s.config.Base, exclusionFunctions...)
if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
}
s.resolver = res
}

s.resolver = res
}

return s.resolver, nil
Expand Down
Loading

0 comments on commit 019df1c

Please sign in to comment.