Skip to content

Commit

Permalink
fixup! feat: only emit changed files with git walker
Browse files Browse the repository at this point in the history
Signed-off-by: Brian McGee <[email protected]>
  • Loading branch information
brianmcgee committed Jul 10, 2024
1 parent a0f479c commit 22718bc
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 37 deletions.
3 changes: 1 addition & 2 deletions cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ type Format struct {
TreeRoot string `type:"existingdir" xor:"tree-root" help:"The root directory from which treefmt will start walking the filesystem (defaults to the directory containing the config file)."`
TreeRootFile string `type:"string" xor:"tree-root" help:"File to search for to find the project root (if --tree-root is not passed)."`
Walk walker.Type `enum:"auto,git,filesystem" default:"auto" help:"The method used to traverse the files within --tree-root. Currently supports 'auto', 'git' or 'filesystem'."`
GitAllFiles bool `default:"false" help:"Forces the git walker to ignore change tracking and emit all files for processing instead of only those that have changed."`

Verbosity int `name:"verbose" short:"v" type:"counter" default:"0" env:"LOG_LEVEL" help:"Set the verbosity of logs e.g. -vv."`
Version bool `name:"version" short:"V" help:"Print version."`
Expand All @@ -42,7 +41,7 @@ type Format struct {
formatters map[string]*format.Formatter
globalExcludes []glob.Glob

filesCh chan *walker.File
fileCh chan *walker.File
formattedCh chan *walker.File
processedCh chan *walker.File
}
Expand Down
28 changes: 14 additions & 14 deletions cli/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,13 @@ func (f *Format) Run() (err error) {

// create a channel for files needing to be processed
// we use a multiple of batch size here as a rudimentary concurrency optimization based on the host machine
f.filesCh = make(chan *walker.File, BatchSize*runtime.NumCPU())
f.fileCh = make(chan *walker.File, BatchSize*runtime.NumCPU())

// create a channel for files that have been formatted
f.formattedCh = make(chan *walker.File, cap(f.filesCh))
f.formattedCh = make(chan *walker.File, cap(f.fileCh))

// create a channel for files that have been processed
f.processedCh = make(chan *walker.File, cap(f.filesCh))
f.processedCh = make(chan *walker.File, cap(f.fileCh))

// start concurrent processing tasks in reverse order
eg.Go(f.updateCache(ctx))
Expand All @@ -168,7 +168,7 @@ func (f *Format) Run() (err error) {
func (f *Format) walkFilesystem(ctx context.Context) func() error {
return func() error {
eg, ctx := errgroup.WithContext(ctx)
pathsCh := make(chan string, BatchSize)
pathCh := make(chan string, BatchSize)

// By default, we use the cli arg, but if the stdin flag has been set we force a filesystem walk
// since we will only be processing one file from a temp directory
Expand Down Expand Up @@ -197,15 +197,15 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error {
}

walkPaths := func() error {
defer close(pathsCh)
defer close(pathCh)

var idx int
for idx < len(f.Paths) {
select {
case <-ctx.Done():
return ctx.Err()
default:
pathsCh <- f.Paths[idx]
pathCh <- f.Paths[idx]
idx += 1
}
}
Expand All @@ -217,18 +217,18 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error {
eg.Go(walkPaths)
} else {
// no explicit paths to process, so we only need to process root
pathsCh <- f.TreeRoot
close(pathsCh)
pathCh <- f.TreeRoot
close(pathCh)
}

// create a filesystem walker
wk, err := walker.New(walkerType, f.TreeRoot, f.GitAllFiles, pathsCh)
wk, err := walker.New(walkerType, f.TreeRoot, f.NoCache, pathCh)
if err != nil {
return fmt.Errorf("failed to create walker: %w", err)
}

// close the files channel when we're done walking the file system
defer close(f.filesCh)
// close the file channel when we're done walking the file system
defer close(f.fileCh)

// if no cache has been configured, or we are processing from stdin, we invoke the walker directly
if f.NoCache || f.Stdin {
Expand All @@ -239,15 +239,15 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error {
default:
stats.Add(stats.Traversed, 1)
stats.Add(stats.Emitted, 1)
f.filesCh <- file
f.fileCh <- file
return nil
}
})
}

// otherwise we pass the walker to the cache and have it generate files for processing based on whether or not
// they have been added/changed since the last invocation
if err = cache.ChangeSet(ctx, wk, f.filesCh); err != nil {
if err = cache.ChangeSet(ctx, wk, f.fileCh); err != nil {
return fmt.Errorf("failed to generate change set: %w", err)
}
return nil
Expand Down Expand Up @@ -319,7 +319,7 @@ func (f *Format) applyFormatters(ctx context.Context) func() error {
}()

// iterate the files channel
for file := range f.filesCh {
for file := range f.fileCh {

// first check if this file has been globally excluded
if format.PathMatches(file.RelPath, f.globalExcludes) {
Expand Down
40 changes: 24 additions & 16 deletions walker/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ import (
)

type gitWalker struct {
root string
paths chan string
repo *git.Repository
allFiles bool
root string
paths chan string
repo *git.Repository

noCache bool
relPathOffset int
}

Expand All @@ -45,16 +46,19 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
// cache in-memory whether a path is present in the git index
var cache map[string]*index.Entry

hasChanged := func(entry *index.Entry, info os.FileInfo) bool {
if g.allFiles {
// user wants all files regardless of changes
return true
}
// we only want to emit files that have changed when compared with the index
// by default, we only emit files if they have changes when compared with the git index
emitFile := func(entry *index.Entry, info os.FileInfo) bool {
// mod time comparison is done with EPOCH (second) precision as per the POSIX spec
return entry.ModifiedAt.Truncate(time.Second) != info.ModTime().Truncate(time.Second)
}

if g.noCache {
// emit all files in the index
emitFile = func(entry *index.Entry, info os.FileInfo) bool {
return true
}
}

for path := range g.paths {

if path == g.root {
Expand All @@ -77,8 +81,8 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
return fmt.Errorf("failed to stat %s: %w", path, err)
}

// skip processing if the file hasn't changed when compared with the index
if !hasChanged(entry, info) {
// skip processing if the file hasn't changed
if !emitFile(entry, info) {
continue
}

Expand All @@ -102,7 +106,7 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
continue
}

// otherwise we ensure the git index entries are cached and then check if they are in the git index
// otherwise we ensure the git index entries are cached and then check if the path is in the git index
if cache == nil {
cache = make(map[string]*index.Entry)
for _, entry := range idx.Entries {
Expand Down Expand Up @@ -135,7 +139,7 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
if entry, ok := cache[relPath]; !ok {
log.Debugf("path %v not found in git index, skipping", path)
return nil
} else if !hasChanged(entry, info) {
} else if !emitFile(entry, info) {
log.Debugf("path %v has not changed, skipping", path)
return nil
}
Expand All @@ -153,7 +157,11 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
return nil
}

func NewGit(root string, allFiles bool, paths chan string) (Walker, error) {
func NewGit(
root string,
noCache bool,
paths chan string,
) (Walker, error) {
repo, err := git.PlainOpen(root)
if err != nil {
return nil, fmt.Errorf("failed to open git repo: %w", err)
Expand All @@ -162,7 +170,7 @@ func NewGit(root string, allFiles bool, paths chan string) (Walker, error) {
root: root,
paths: paths,
repo: repo,
allFiles: allFiles,
noCache: noCache,
relPathOffset: len(root) + 1,
}, nil
}
10 changes: 5 additions & 5 deletions walker/walker.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,22 @@ type Walker interface {
Walk(ctx context.Context, fn WalkFunc) error
}

func New(walkerType Type, root string, gitAllFiles bool, pathsCh chan string) (Walker, error) {
func New(walkerType Type, root string, noCache bool, pathsCh chan string) (Walker, error) {
switch walkerType {
case Git:
return NewGit(root, gitAllFiles, pathsCh)
return NewGit(root, noCache, pathsCh)
case Auto:
return Detect(root, gitAllFiles, pathsCh)
return Detect(root, noCache, pathsCh)
case Filesystem:
return NewFilesystem(root, pathsCh)
default:
return nil, fmt.Errorf("unknown walker type: %v", walkerType)
}
}

func Detect(root string, gitAllFiles bool, pathsCh chan string) (Walker, error) {
func Detect(root string, noCache bool, pathsCh chan string) (Walker, error) {
// for now, we keep it simple and try git first, filesystem second
w, err := NewGit(root, gitAllFiles, pathsCh)
w, err := NewGit(root, noCache, pathsCh)
if err == nil {
return w, err
}
Expand Down

0 comments on commit 22718bc

Please sign in to comment.