From 22718bcf5fdd1559cd8db5818744a958e451995f Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Wed, 10 Jul 2024 21:30:22 +0100 Subject: [PATCH] fixup! feat: only emit changed files with git walker Signed-off-by: Brian McGee --- cli/cli.go | 3 +-- cli/format.go | 28 ++++++++++++++-------------- walker/git.go | 40 ++++++++++++++++++++++++---------------- walker/walker.go | 10 +++++----- 4 files changed, 44 insertions(+), 37 deletions(-) diff --git a/cli/cli.go b/cli/cli.go index ecebb72c..655e670a 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -26,7 +26,6 @@ type Format struct { TreeRoot string `type:"existingdir" xor:"tree-root" help:"The root directory from which treefmt will start walking the filesystem (defaults to the directory containing the config file)."` TreeRootFile string `type:"string" xor:"tree-root" help:"File to search for to find the project root (if --tree-root is not passed)."` Walk walker.Type `enum:"auto,git,filesystem" default:"auto" help:"The method used to traverse the files within --tree-root. Currently supports 'auto', 'git' or 'filesystem'."` - GitAllFiles bool `default:"false" help:"Forces the git walker to ignore change tracking and emit all files for processing instead of only those that have changed."` Verbosity int `name:"verbose" short:"v" type:"counter" default:"0" env:"LOG_LEVEL" help:"Set the verbosity of logs e.g. -vv."` Version bool `name:"version" short:"V" help:"Print version."` @@ -42,7 +41,7 @@ type Format struct { formatters map[string]*format.Formatter globalExcludes []glob.Glob - filesCh chan *walker.File + fileCh chan *walker.File formattedCh chan *walker.File processedCh chan *walker.File } diff --git a/cli/format.go b/cli/format.go index ab3ad378..34cbfdb4 100644 --- a/cli/format.go +++ b/cli/format.go @@ -147,13 +147,13 @@ func (f *Format) Run() (err error) { // create a channel for files needing to be processed // we use a multiple of batch size here as a rudimentary concurrency optimization based on the host machine - f.filesCh = make(chan *walker.File, BatchSize*runtime.NumCPU()) + f.fileCh = make(chan *walker.File, BatchSize*runtime.NumCPU()) // create a channel for files that have been formatted - f.formattedCh = make(chan *walker.File, cap(f.filesCh)) + f.formattedCh = make(chan *walker.File, cap(f.fileCh)) // create a channel for files that have been processed - f.processedCh = make(chan *walker.File, cap(f.filesCh)) + f.processedCh = make(chan *walker.File, cap(f.fileCh)) // start concurrent processing tasks in reverse order eg.Go(f.updateCache(ctx)) @@ -168,7 +168,7 @@ func (f *Format) Run() (err error) { func (f *Format) walkFilesystem(ctx context.Context) func() error { return func() error { eg, ctx := errgroup.WithContext(ctx) - pathsCh := make(chan string, BatchSize) + pathCh := make(chan string, BatchSize) // By default, we use the cli arg, but if the stdin flag has been set we force a filesystem walk // since we will only be processing one file from a temp directory @@ -197,7 +197,7 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error { } walkPaths := func() error { - defer close(pathsCh) + defer close(pathCh) var idx int for idx < len(f.Paths) { @@ -205,7 +205,7 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error { case <-ctx.Done(): return ctx.Err() default: - pathsCh <- f.Paths[idx] + pathCh <- f.Paths[idx] idx += 1 } } @@ -217,18 +217,18 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error { eg.Go(walkPaths) } else { // no explicit paths to process, so we only need to process root - pathsCh <- f.TreeRoot - close(pathsCh) + pathCh <- f.TreeRoot + close(pathCh) } // create a filesystem walker - wk, err := walker.New(walkerType, f.TreeRoot, f.GitAllFiles, pathsCh) + wk, err := walker.New(walkerType, f.TreeRoot, f.NoCache, pathCh) if err != nil { return fmt.Errorf("failed to create walker: %w", err) } - // close the files channel when we're done walking the file system - defer close(f.filesCh) + // close the file channel when we're done walking the file system + defer close(f.fileCh) // if no cache has been configured, or we are processing from stdin, we invoke the walker directly if f.NoCache || f.Stdin { @@ -239,7 +239,7 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error { default: stats.Add(stats.Traversed, 1) stats.Add(stats.Emitted, 1) - f.filesCh <- file + f.fileCh <- file return nil } }) @@ -247,7 +247,7 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error { // otherwise we pass the walker to the cache and have it generate files for processing based on whether or not // they have been added/changed since the last invocation - if err = cache.ChangeSet(ctx, wk, f.filesCh); err != nil { + if err = cache.ChangeSet(ctx, wk, f.fileCh); err != nil { return fmt.Errorf("failed to generate change set: %w", err) } return nil @@ -319,7 +319,7 @@ func (f *Format) applyFormatters(ctx context.Context) func() error { }() // iterate the files channel - for file := range f.filesCh { + for file := range f.fileCh { // first check if this file has been globally excluded if format.PathMatches(file.RelPath, f.globalExcludes) { diff --git a/walker/git.go b/walker/git.go index d58f3e29..bbf30652 100644 --- a/walker/git.go +++ b/walker/git.go @@ -16,10 +16,11 @@ import ( ) type gitWalker struct { - root string - paths chan string - repo *git.Repository - allFiles bool + root string + paths chan string + repo *git.Repository + + noCache bool relPathOffset int } @@ -45,16 +46,19 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { // cache in-memory whether a path is present in the git index var cache map[string]*index.Entry - hasChanged := func(entry *index.Entry, info os.FileInfo) bool { - if g.allFiles { - // user wants all files regardless of changes - return true - } - // we only want to emit files that have changed when compared with the index + // by default, we only emit files if they have changes when compared with the git index + emitFile := func(entry *index.Entry, info os.FileInfo) bool { // mod time comparison is done with EPOCH (second) precision as per the POSIX spec return entry.ModifiedAt.Truncate(time.Second) != info.ModTime().Truncate(time.Second) } + if g.noCache { + // emit all files in the index + emitFile = func(entry *index.Entry, info os.FileInfo) bool { + return true + } + } + for path := range g.paths { if path == g.root { @@ -77,8 +81,8 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { return fmt.Errorf("failed to stat %s: %w", path, err) } - // skip processing if the file hasn't changed when compared with the index - if !hasChanged(entry, info) { + // skip processing if the file hasn't changed + if !emitFile(entry, info) { continue } @@ -102,7 +106,7 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { continue } - // otherwise we ensure the git index entries are cached and then check if they are in the git index + // otherwise we ensure the git index entries are cached and then check if the path is in the git index if cache == nil { cache = make(map[string]*index.Entry) for _, entry := range idx.Entries { @@ -135,7 +139,7 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { if entry, ok := cache[relPath]; !ok { log.Debugf("path %v not found in git index, skipping", path) return nil - } else if !hasChanged(entry, info) { + } else if !emitFile(entry, info) { log.Debugf("path %v has not changed, skipping", path) return nil } @@ -153,7 +157,11 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { return nil } -func NewGit(root string, allFiles bool, paths chan string) (Walker, error) { +func NewGit( + root string, + noCache bool, + paths chan string, +) (Walker, error) { repo, err := git.PlainOpen(root) if err != nil { return nil, fmt.Errorf("failed to open git repo: %w", err) @@ -162,7 +170,7 @@ func NewGit(root string, allFiles bool, paths chan string) (Walker, error) { root: root, paths: paths, repo: repo, - allFiles: allFiles, + noCache: noCache, relPathOffset: len(root) + 1, }, nil } diff --git a/walker/walker.go b/walker/walker.go index d1e1f0e2..270854cf 100644 --- a/walker/walker.go +++ b/walker/walker.go @@ -56,12 +56,12 @@ type Walker interface { Walk(ctx context.Context, fn WalkFunc) error } -func New(walkerType Type, root string, gitAllFiles bool, pathsCh chan string) (Walker, error) { +func New(walkerType Type, root string, noCache bool, pathsCh chan string) (Walker, error) { switch walkerType { case Git: - return NewGit(root, gitAllFiles, pathsCh) + return NewGit(root, noCache, pathsCh) case Auto: - return Detect(root, gitAllFiles, pathsCh) + return Detect(root, noCache, pathsCh) case Filesystem: return NewFilesystem(root, pathsCh) default: @@ -69,9 +69,9 @@ func New(walkerType Type, root string, gitAllFiles bool, pathsCh chan string) (W } } -func Detect(root string, gitAllFiles bool, pathsCh chan string) (Walker, error) { +func Detect(root string, noCache bool, pathsCh chan string) (Walker, error) { // for now, we keep it simple and try git first, filesystem second - w, err := NewGit(root, gitAllFiles, pathsCh) + w, err := NewGit(root, noCache, pathsCh) if err == nil { return w, err }