diff --git a/cache/cache.go b/cache/cache.go index fe1c1ac2..b6703cba 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -1,293 +1,196 @@ package cache import ( - "context" "crypto/sha1" "encoding/hex" "fmt" "os" - "runtime" "time" - "git.numtide.com/numtide/treefmt/stats" - - "git.numtide.com/numtide/treefmt/format" - "git.numtide.com/numtide/treefmt/walk" - - "github.com/charmbracelet/log" - "github.com/adrg/xdg" + "github.com/charmbracelet/log" "github.com/vmihailenco/msgpack/v5" bolt "go.etcd.io/bbolt" ) const ( - pathsBucket = "paths" - formattersBucket = "formatters" + bucketPaths = "paths" + bucketFormatters = "formatters" ) -// Entry represents a cache entry, indicating the last size and modified time for a file path. type Entry struct { Size int64 Modified time.Time } -var ( - db *bolt.DB - logger *log.Logger - - ReadBatchSize = 1024 * runtime.NumCPU() -) - -// Open creates an instance of bolt.DB for a given treeRoot path. -// If clean is true, Open will delete any existing data in the cache. -// -// The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/.db`, where is determined by hashing -// the treeRoot path. This associates a given treeRoot with a given instance of the cache. -func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) { - logger = log.WithPrefix("cache") +type Bucket[V any] struct { + bucket *bolt.Bucket +} - // determine a unique and consistent db name for the tree root - h := sha1.New() - h.Write([]byte(treeRoot)) - digest := h.Sum(nil) +func (b *Bucket[V]) Size() int { + return b.bucket.Stats().KeyN +} - name := hex.EncodeToString(digest) - path, err := xdg.CacheFile(fmt.Sprintf("treefmt/eval-cache/%v.db", name)) - if err != nil { - return fmt.Errorf("could not resolve local path for the cache: %w", err) +func (b *Bucket[V]) Get(key string) (*V, error) { + bytes := b.bucket.Get([]byte(key)) + if bytes == nil { + return nil, nil + } + var value V + if err := msgpack.Unmarshal(bytes, &value); err != nil { + return nil, fmt.Errorf("failed to unmarshal cache entry for key '%v': %w", key, err) } + return &value, nil +} - db, err = bolt.Open(path, 0o600, nil) - if err != nil { - return fmt.Errorf("failed to open cache at %v: %w", path, err) +func (b *Bucket[V]) Put(key string, value *V) error { + if bytes, err := msgpack.Marshal(value); err != nil { + return fmt.Errorf("failed to marshal cache entry for key %v: %w", key, err) + } else if err = b.bucket.Put([]byte(key), bytes); err != nil { + return fmt.Errorf("failed to put cache entry for key %v: %w", key, err) } + return nil +} - err = db.Update(func(tx *bolt.Tx) error { - // create bucket for tracking paths - pathsBucket, err := tx.CreateBucketIfNotExists([]byte(pathsBucket)) - if err != nil { - return fmt.Errorf("failed to create paths bucket: %w", err) - } +func (b *Bucket[V]) Delete(key string) error { + return b.bucket.Delete([]byte(key)) +} - // create bucket for tracking formatters - formattersBucket, err := tx.CreateBucketIfNotExists([]byte(formattersBucket)) - if err != nil { - return fmt.Errorf("failed to create formatters bucket: %w", err) +func (b *Bucket[V]) DeleteAll() error { + c := b.bucket.Cursor() + for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() { + if err := c.Delete(); err != nil { + return fmt.Errorf("failed to remove cache entry for key %s: %w", string(k), err) } + } + return nil +} - // check for any newly configured or modified formatters - for name, formatter := range formatters { - - stat, err := os.Lstat(formatter.Executable()) - if err != nil { - return fmt.Errorf("failed to stat formatter executable %v: %w", formatter.Executable(), err) - } - - entry, err := getEntry(formattersBucket, name) - if err != nil { - return fmt.Errorf("failed to retrieve cache entry for formatter %v: %w", name, err) - } - - isNew := entry == nil - hasChanged := entry != nil && !(entry.Size == stat.Size() && entry.Modified == stat.ModTime()) - - if isNew { - logger.Debugf("formatter '%s' is new", name) - } else if hasChanged { - logger.Debug("formatter '%s' has changed", - name, - "size", stat.Size(), - "modTime", stat.ModTime(), - "cachedSize", entry.Size, - "cachedModTime", entry.Modified, - ) - } - - // update overall clean flag - clean = clean || isNew || hasChanged - - // record formatters info - entry = &Entry{ - Size: stat.Size(), - Modified: stat.ModTime(), - } - - if err = putEntry(formattersBucket, name, entry); err != nil { - return fmt.Errorf("failed to write cache entry for formatter %v: %w", name, err) - } +func (b *Bucket[V]) ForEach(f func(string, *V) error) error { + return b.bucket.ForEach(func(key, bytes []byte) error { + var value V + if err := msgpack.Unmarshal(bytes, &value); err != nil { + return fmt.Errorf("failed to unmarshal cache entry for key '%v': %w", key, err) } + return f(string(key), &value) + }) +} - // check for any removed formatters - if err = formattersBucket.ForEach(func(key []byte, _ []byte) error { - _, ok := formatters[string(key)] - if !ok { - // remove the formatter entry from the cache - if err = formattersBucket.Delete(key); err != nil { - return fmt.Errorf("failed to remove cache entry for formatter %v: %w", key, err) - } - // indicate a clean is required - clean = true - } - return nil - }); err != nil { - return fmt.Errorf("failed to check cache for removed formatters: %w", err) - } +type Tx struct { + tx *bolt.Tx +} - if clean { - // remove all path entries - c := pathsBucket.Cursor() - for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() { - if err = c.Delete(); err != nil { - return fmt.Errorf("failed to remove path entry: %w", err) - } - } - } +func (t *Tx) Commit() error { + return t.tx.Commit() +} - return nil - }) +func (t *Tx) Rollback() error { + return t.tx.Rollback() +} - return +func (t *Tx) Paths() (*Bucket[Entry], error) { + return t.cacheBucket(bucketPaths) } -// Close closes any open instance of the cache. -func Close() error { - if db == nil { - return nil - } - return db.Close() +func (t *Tx) Formatters() (*Bucket[Entry], error) { + return t.cacheBucket(bucketFormatters) } -// getEntry is a helper for reading cache entries from bolt. -func getEntry(bucket *bolt.Bucket, path string) (*Entry, error) { - b := bucket.Get([]byte(path)) - if b != nil { - var cached Entry - if err := msgpack.Unmarshal(b, &cached); err != nil { - return nil, fmt.Errorf("failed to unmarshal cache info for path '%v': %w", path, err) - } - return &cached, nil +func (t *Tx) cacheBucket(name string) (*Bucket[Entry], error) { + var b *bolt.Bucket + var err error + if t.tx.Writable() { + b, err = t.tx.CreateBucketIfNotExists([]byte(name)) } else { - return nil, nil + b = t.tx.Bucket([]byte(name)) } -} - -// putEntry is a helper for writing cache entries into bolt. -func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error { - bytes, err := msgpack.Marshal(entry) if err != nil { - return fmt.Errorf("failed to marshal cache path %v: %w", path, err) + return nil, fmt.Errorf("failed to get/create bucket %s: %w", bucketPaths, err) } - - if err = bucket.Put([]byte(path), bytes); err != nil { - return fmt.Errorf("failed to put cache path %v: %w", path, err) - } - return nil + return &Bucket[Entry]{b}, nil } -// ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh. -// It determines if a path is new or has changed by comparing against cache entries. -func ChangeSet(ctx context.Context, walker walk.Walker, filesCh chan<- *walk.File) error { - start := time.Now() +type Cache struct { + db *bolt.DB + Temporary bool +} - defer func() { - logger.Debugf("finished generating change set in %v", time.Since(start)) - }() +func (c *Cache) BeginTx(writeable bool) (*Tx, error) { + tx, err := c.db.Begin(writeable) + return &Tx{tx}, err +} - var tx *bolt.Tx - var bucket *bolt.Bucket - var processed int +func (c *Cache) View(f func(*Tx) error) error { + return c.db.View(func(tx *bolt.Tx) error { + return f(&Tx{tx}) + }) +} - defer func() { - // close any pending read tx - if tx != nil { - _ = tx.Rollback() - } - }() +func (c *Cache) Update(f func(*Tx) error) error { + return c.db.Update(func(tx *bolt.Tx) error { + return f(&Tx{tx}) + }) +} - return walker.Walk(ctx, func(file *walk.File, err error) error { - select { - case <-ctx.Done(): - return ctx.Err() - default: - if err != nil { - return fmt.Errorf("failed to walk path: %w", err) - } else if file.Info.IsDir() { - // ignore directories - return nil - } - } +func (c *Cache) Close() error { + path := c.db.Path() - // open a new read tx if there isn't one in progress - // we have to periodically open a new read tx to prevent writes from being blocked - if tx == nil { - tx, err = db.Begin(false) - if err != nil { - return fmt.Errorf("failed to open a new cache read tx: %w", err) + // if this is a temporary cache instance, clean up the db file after closing + if c.Temporary { + defer func() { + if err := os.Remove(path); err != nil { + log.Errorf("failed to remove temporary cache file %s: %v", path, err) } - bucket = tx.Bucket([]byte(pathsBucket)) - } - - cached, err := getEntry(bucket, file.RelPath) - if err != nil { - return err - } - - changedOrNew := cached == nil || !(cached.Modified == file.Info.ModTime() && cached.Size == file.Info.Size()) - - stats.Add(stats.Traversed, 1) - if !changedOrNew { - // no change - return nil - } + log.Debugf("successfully removed temporary cache file %s", path) + }() + } - stats.Add(stats.Emitted, 1) + return c.db.Close() +} - // pass on the path - select { - case <-ctx.Done(): - return ctx.Err() - default: - filesCh <- file +func Open(treeRoot string, noCache bool) (*Cache, error) { + var err error + var path string + + if noCache { + // If no cache is desired, rather than complicate the logic for cache / no cache, we just create a cache instance + // backed by a temporary file instead + if f, err := os.CreateTemp("", "treefmt-no-cache-*.db"); err != nil { + return nil, fmt.Errorf("failed to create a temporary db file: %w", err) + } else { + path = f.Name() } - - // close the current tx if we have reached the batch size - processed += 1 - if processed == ReadBatchSize { - err = tx.Rollback() - tx = nil - return err + } else { + // Otherwise, the database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/.db`, where is + // determined by hashing the treeRoot path. + // This associates a given treeRoot with a given instance of the cache. + h := sha1.New() + h.Write([]byte(treeRoot)) + digest := h.Sum(nil) + + name := hex.EncodeToString(digest) + if path, err = xdg.CacheFile(fmt.Sprintf("treefmt/eval-cache/%v.db", name)); err != nil { + return nil, fmt.Errorf("could not resolve local path for the cache: %w", err) } - - return nil - }) -} - -// Update is used to record updated cache information for the specified list of paths. -func Update(files []*walk.File) error { - start := time.Now() - defer func() { - logger.Debugf("finished processing %v paths in %v", len(files), time.Since(start)) - }() - - if len(files) == 0 { - return nil } - return db.Update(func(tx *bolt.Tx) error { - bucket := tx.Bucket([]byte(pathsBucket)) + // open db + db, err := bolt.Open(path, 0o600, nil) + if err != nil { + return nil, err + } - for _, f := range files { - entry := Entry{ - Size: f.Info.Size(), - Modified: f.Info.ModTime(), - } + c := &Cache{ + db: db, + Temporary: noCache, + } - if err := putEntry(bucket, f.RelPath, &entry); err != nil { - return err - } + // force creation of buckets if they don't already exist + return c, c.Update(func(tx *Tx) error { + if _, err := tx.Paths(); err != nil { + return err } - - return nil + _, err = tx.Formatters() + return err }) } diff --git a/cache/cache_test.go b/cache/cache_test.go new file mode 100644 index 00000000..92cd5f30 --- /dev/null +++ b/cache/cache_test.go @@ -0,0 +1,166 @@ +package cache + +import ( + "os" + "strings" + "testing" + "time" + + "github.com/adrg/xdg" + "github.com/stretchr/testify/require" +) + +func TestCache_Open(t *testing.T) { + as := require.New(t) + + tempDir := t.TempDir() + xdgPrefix, err := xdg.CacheFile("") + as.NoError(err) + + // normal open + cache, err := Open(tempDir, false) + path := cache.db.Path() + + as.NoError(err) + as.True( + strings.HasPrefix(path, xdgPrefix), + "db path %s does not contain the xdg cache file prefix %s", + path, xdgPrefix, + ) + + // normal close + as.NoError(cache.Close()) + _, err = os.Stat(path) + as.NoError(err, "db path %s should still exist after closing the cache", path) + + // open a temp cache e.g. --no-cache + tempDir = t.TempDir() + cache, err = Open(tempDir, true) + as.NoError(err) + + // close temp cache + as.NoError(cache.Close()) + _, err = os.Stat(cache.db.Path()) + as.ErrorIs(err, os.ErrNotExist, "temp db path %s should not exist after closing the cache") +} + +func TestCache_Update(t *testing.T) { + as := require.New(t) + + cache, err := Open(t.TempDir(), false) + as.NoError(err) + + now := time.Now() + + testData := map[string]map[string]*Entry{ + "paths": { + "foo": {Size: 0, Modified: now}, + "bar": {Size: 1, Modified: now.Add(-1 * time.Second)}, + "fizz/buzz": {Size: 1 << 16, Modified: now.Add(-1 * time.Minute)}, + }, + "formatters": { + "bla": {Size: 1 << 32, Modified: now.Add(-1 * time.Hour)}, + "foo/bar/baz": {Size: 1 << 24, Modified: now.Add(-24 * time.Hour)}, + }, + } + + putEntries := func(bucket *Bucket[Entry], err error) func(string) { + return func(name string) { + as.NoError(err) + for k, v := range testData[name] { + as.NoError(bucket.Put(k, v), "failed to put value") + } + } + } + + getEntries := func(bucket *Bucket[Entry], err error) func(string) { + return func(name string) { + as.NoError(err) + as.Equal(len(testData[name]), bucket.Size()) + for k, v := range testData[name] { + actual, err := bucket.Get(k) + as.NoError(err) + as.EqualExportedValues(*v, *actual) + } + } + } + + modifyEntries := func(bucket *Bucket[Entry], err error) func(string) { + return func(name string) { + entries := testData[name] + idx := 0 + for k := range entries { + switch idx { + case 0: + // delete the first entry + as.NoError(bucket.Delete(k)) + delete(entries, k) + case 1: + // change the second + entries[k] = &Entry{Size: 123, Modified: now.Add(-2 * time.Hour)} + as.NoError(bucket.Put(k, entries[k])) + default: + break + } + } + } + } + + clearEntries := func(bucket *Bucket[Entry], err error) { + as.NoError(err) + as.NoError(bucket.DeleteAll()) + } + + checkEmpty := func(bucket *Bucket[Entry], err error) { + as.NoError(err) + as.Equal(0, bucket.Size()) + } + + // insert the test data into the cache + err = cache.Update(func(tx *Tx) error { + putEntries(tx.Paths())("paths") + putEntries(tx.Formatters())("formatters") + return nil + }) + as.NoError(err) + + // read it back and check it matches + err = cache.View(func(tx *Tx) error { + getEntries(tx.Paths())("paths") + getEntries(tx.Formatters())("formatters") + return nil + }) + as.NoError(err) + + // delete and update some entries + err = cache.Update(func(tx *Tx) error { + modifyEntries(tx.Paths()) + modifyEntries(tx.Formatters()) + return nil + }) + as.NoError(err) + + // read them back and make sure they match the updated test data + err = cache.View(func(tx *Tx) error { + getEntries(tx.Paths())("paths") + getEntries(tx.Formatters())("formatters") + return nil + }) + as.NoError(err) + + // delete all + err = cache.Update(func(tx *Tx) error { + clearEntries(tx.Paths()) + clearEntries(tx.Formatters()) + return nil + }) + as.NoError(err) + + // check the cache is empty + err = cache.Update(func(tx *Tx) error { + checkEmpty(tx.Paths()) + checkEmpty(tx.Formatters()) + return nil + }) + as.NoError(err) +} diff --git a/cli/cli.go b/cli/cli.go index 0d56cafb..fd3a00c9 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -1,12 +1,13 @@ package cli import ( + "git.numtide.com/numtide/treefmt/cache" "os" "github.com/gobwas/glob" "git.numtide.com/numtide/treefmt/format" - "git.numtide.com/numtide/treefmt/walk" + "git.numtide.com/numtide/treefmt/walker" "github.com/alecthomas/kong" "github.com/charmbracelet/log" ) @@ -25,10 +26,11 @@ type Format struct { Formatters []string `short:"f" help:"Specify formatters to apply. Defaults to all formatters."` TreeRoot string `type:"existingdir" xor:"tree-root" help:"The root directory from which treefmt will start walking the filesystem (defaults to the directory containing the config file)."` TreeRootFile string `type:"string" xor:"tree-root" help:"File to search for to find the project root (if --tree-root is not passed)."` - Walk walk.Type `enum:"auto,git,filesystem" default:"auto" help:"The method used to traverse the files within --tree-root. Currently supports 'auto', 'git' or 'filesystem'."` - Verbosity int `name:"verbose" short:"v" type:"counter" default:"0" env:"LOG_LEVEL" help:"Set the verbosity of logs e.g. -vv."` - Version bool `name:"version" short:"V" help:"Print version."` - Init bool `name:"init" short:"i" help:"Create a new treefmt.toml."` + Walk walker.Type `enum:"auto,git,filesystem" default:"auto" help:"The method used to traverse the files within --tree-root. Currently supports 'auto', 'git' or 'filesystem'."` + + Verbosity int `name:"verbose" short:"v" type:"counter" default:"0" env:"LOG_LEVEL" help:"Set the verbosity of logs e.g. -vv."` + Version bool `name:"version" short:"V" help:"Print version."` + Init bool `name:"init" short:"i" help:"Create a new treefmt.toml."` OnUnmatched log.Level `name:"on-unmatched" short:"u" default:"warn" help:"Log paths that did not match any formatters at the specified log level, with fatal exiting the process with an error. Possible values are ."` @@ -40,9 +42,12 @@ type Format struct { formatters map[string]*format.Formatter globalExcludes []glob.Glob - filesCh chan *walk.File - formattedCh chan *walk.File - processedCh chan *walk.File + cache *cache.Cache + + walker walker.Walker + fileCh chan *walker.File + formattedCh chan *walker.File + processedCh chan *walker.File } func (f *Format) configureLogging() { diff --git a/cli/format.go b/cli/format.go index 95ff37bc..c4f81ec3 100644 --- a/cli/format.go +++ b/cli/format.go @@ -13,12 +13,13 @@ import ( "syscall" "time" + "git.numtide.com/numtide/treefmt/cache" + "git.numtide.com/numtide/treefmt/format" "git.numtide.com/numtide/treefmt/stats" - "git.numtide.com/numtide/treefmt/cache" "git.numtide.com/numtide/treefmt/config" - "git.numtide.com/numtide/treefmt/walk" + "git.numtide.com/numtide/treefmt/walker" "github.com/charmbracelet/log" "golang.org/x/sync/errgroup" @@ -53,13 +54,6 @@ func (f *Format) Run() (err error) { // create a prefixed logger log.SetPrefix("format") - // ensure cache is closed on return - defer func() { - if err := cache.Close(); err != nil { - log.Errorf("failed to close cache: %v", err) - } - }() - // find the config file unless specified if f.ConfigFile == "" { pwd, err := os.Getwd() @@ -119,12 +113,37 @@ func (f *Format) Run() (err error) { f.formatters[name] = formatter } - // open the cache if configured - if !f.NoCache { - if err = cache.Open(f.TreeRoot, f.ClearCache, f.formatters); err != nil { - // if we can't open the cache, we log a warning and fallback to no cache - log.Warnf("failed to open cache: %v", err) - f.NoCache = true + // initialise the cache + f.cache, err = cache.Open(f.TreeRoot, f.NoCache) + if err != nil { + return fmt.Errorf("failed to open cache: %w", err) + } + + // ensure the cache is closed on shutdown + defer func() { + if err := f.cache.Close(); err != nil { + log.Errorf("failed to close cache: %v", err) + } + log.Debug("successfully closed cache") + }() + + // check if the formatters set has changed in any meaningful way + if err = format.CheckFormatters(f.cache, f.formatters); err != nil { + return fmt.Errorf("failed to check formatters: %w", err) + } + log.Debugf("formatters check complete") + + // clear out the paths bucket if desired before starting + if f.ClearCache { + err = f.cache.Update(func(tx *cache.Tx) error { + paths, err := tx.Paths() + if err != nil { + return err + } + return paths.DeleteAll() + }) + if err != nil { + return fmt.Errorf("failed to clear paths from cache: %w", err) } } @@ -147,13 +166,13 @@ func (f *Format) Run() (err error) { // create a channel for files needing to be processed // we use a multiple of batch size here as a rudimentary concurrency optimization based on the host machine - f.filesCh = make(chan *walk.File, BatchSize*runtime.NumCPU()) + f.fileCh = make(chan *walker.File, BatchSize*runtime.NumCPU()) // create a channel for files that have been formatted - f.formattedCh = make(chan *walk.File, cap(f.filesCh)) + f.formattedCh = make(chan *walker.File, cap(f.fileCh)) // create a channel for files that have been processed - f.processedCh = make(chan *walk.File, cap(f.filesCh)) + f.processedCh = make(chan *walker.File, cap(f.fileCh)) // start concurrent processing tasks in reverse order eg.Go(f.updateCache(ctx)) @@ -168,14 +187,14 @@ func (f *Format) Run() (err error) { func (f *Format) walkFilesystem(ctx context.Context) func() error { return func() error { eg, ctx := errgroup.WithContext(ctx) - pathsCh := make(chan string, BatchSize) + pathCh := make(chan string, BatchSize) // By default, we use the cli arg, but if the stdin flag has been set we force a filesystem walk // since we will only be processing one file from a temp directory walkerType := f.Walk if f.Stdin { - walkerType = walk.Filesystem + walkerType = walker.Filesystem // check we have only received one path arg which we use for the file extension / matching to formatters if len(f.Paths) != 1 { @@ -197,7 +216,7 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error { } walkPaths := func() error { - defer close(pathsCh) + defer close(pathCh) var idx int for idx < len(f.Paths) { @@ -205,7 +224,7 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error { case <-ctx.Done(): return ctx.Err() default: - pathsCh <- f.Paths[idx] + pathCh <- f.Paths[idx] idx += 1 } } @@ -217,40 +236,31 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error { eg.Go(walkPaths) } else { // no explicit paths to process, so we only need to process root - pathsCh <- f.TreeRoot - close(pathsCh) + pathCh <- f.TreeRoot + close(pathCh) } // create a filesystem walker - walker, err := walk.New(walkerType, f.TreeRoot, pathsCh) + var err error + f.walker, err = walker.New(walkerType, f.TreeRoot, f.cache, pathCh) if err != nil { return fmt.Errorf("failed to create walker: %w", err) } - // close the files channel when we're done walking the file system - defer close(f.filesCh) - - // if no cache has been configured, or we are processing from stdin, we invoke the walker directly - if f.NoCache || f.Stdin { - return walker.Walk(ctx, func(file *walk.File, err error) error { - select { - case <-ctx.Done(): - return ctx.Err() - default: - stats.Add(stats.Traversed, 1) - stats.Add(stats.Emitted, 1) - f.filesCh <- file - return nil - } - }) - } + // close the file channel when we're done walking the file system + defer close(f.fileCh) - // otherwise we pass the walker to the cache and have it generate files for processing based on whether or not - // they have been added/changed since the last invocation - if err = cache.ChangeSet(ctx, walker, f.filesCh); err != nil { - return fmt.Errorf("failed to generate change set: %w", err) - } - return nil + // + return f.walker.Walk(ctx, func(file *walker.File, err error) error { + select { + case <-ctx.Done(): + return ctx.Err() + default: + stats.Add(stats.Emitted, 1) + f.fileCh <- file + return nil + } + }) } } @@ -319,7 +329,7 @@ func (f *Format) applyFormatters(ctx context.Context) func() error { }() // iterate the files channel - for file := range f.filesCh { + for file := range f.fileCh { // first check if this file has been globally excluded if format.PathMatches(file.RelPath, f.globalExcludes) { @@ -419,21 +429,21 @@ func (f *Format) detectFormatted(ctx context.Context) func() error { func (f *Format) updateCache(ctx context.Context) func() error { return func() error { // used to batch updates for more efficient txs - batch := make([]*walk.File, 0, BatchSize) + batch := make([]*walker.File, 0, BatchSize) // apply a batch processBatch := func() error { - // pass the batch to the cache for updating - if err := cache.Update(batch); err != nil { + // let the walker record updated path info + if err := f.walker.UpdatePaths(batch); err != nil { return err } + // reset the batch batch = batch[:0] return nil } // if we are processing from stdin that means we are outputting to stdout, no caching involved - // if f.NoCache is set that means either the user explicitly disabled the cache or we failed to open on - if f.Stdin || f.NoCache { + if f.Stdin { // do nothing processBatch = func() error { return nil } } diff --git a/cli/format_test.go b/cli/format_test.go index 1d6d75dd..11a77e20 100644 --- a/cli/format_test.go +++ b/cli/format_test.go @@ -412,7 +412,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { } test.WriteConfig(t, configPath, cfg) - args := []string{"--config-file", configPath, "--tree-root", tempDir} + args := []string{"--config-file", configPath, "--tree-root", tempDir, "-vv"} _, err := cmd(t, args...) as.NoError(err) assertStats(t, as, 32, 32, 3, 0) @@ -425,8 +425,8 @@ func TestBustCacheOnFormatterChange(t *testing.T) { assertStats(t, as, 32, 32, 3, 0) // check cache is working - _, err = cmd(t, args...) - as.NoError(err) + out, err := cmd(t, args...) + as.NoError(err, string(out)) assertStats(t, as, 32, 0, 0, 0) // tweak mod time of python formatter diff --git a/format/cache.go b/format/cache.go new file mode 100644 index 00000000..51278015 --- /dev/null +++ b/format/cache.go @@ -0,0 +1,93 @@ +package format + +import ( + "fmt" + "git.numtide.com/numtide/treefmt/cache" + "github.com/charmbracelet/log" + "os" +) + +func CheckFormatters(c *cache.Cache, formatters map[string]*Formatter) error { + return c.Update(func(tx *cache.Tx) error { + + clearPaths := false + + pathsBucket, err := tx.Paths() + if err != nil { + return fmt.Errorf("failed to get paths bucket from cache: %w", err) + } + + formattersBucket, err := tx.Formatters() + if err != nil { + return fmt.Errorf("failed to get formatters bucket from cache: %w", err) + } + + // check for any newly configured or modified formatters + for name, formatter := range formatters { + + stat, err := os.Lstat(formatter.Executable()) + if err != nil { + return fmt.Errorf("failed to stat formatter executable %v: %w", formatter.Executable(), err) + } + + entry, err := formattersBucket.Get(name) + if err != nil { + return fmt.Errorf("failed to retrieve cache entry for formatter %v: %w", name, err) + } + + isNew := entry == nil + hasChanged := entry != nil && !(entry.Size == stat.Size() && entry.Modified == stat.ModTime()) + + if isNew { + log.Debugf("formatter '%s' is new", name) + } else if hasChanged { + log.Debug("formatter '%s' has changed", + name, + "size", stat.Size(), + "modTime", stat.ModTime(), + "cachedSize", entry.Size, + "cachedModTime", entry.Modified, + ) + } + + // update overall flag + clearPaths = clearPaths || isNew || hasChanged + + // record formatters info + entry = &cache.Entry{ + Size: stat.Size(), + Modified: stat.ModTime(), + } + + if err = formattersBucket.Put(name, entry); err != nil { + return fmt.Errorf("failed to write cache entry for formatter %v: %w", name, err) + } + } + + // check for any removed formatters + if err = formattersBucket.ForEach(func(key string, _ *cache.Entry) error { + _, ok := formatters[key] + if !ok { + // remove the formatter entry from the cache + if err = formattersBucket.Delete(key); err != nil { + return fmt.Errorf("failed to remove cache entry for formatter %v: %w", key, err) + } + // indicate a clean is required + clearPaths = true + } + return nil + }); err != nil { + return fmt.Errorf("failed to check cache for removed formatters: %w", err) + } + + if clearPaths { + // remove all path entries + if err := pathsBucket.DeleteAll(); err != nil { + return fmt.Errorf("failed to remove all path entries from cache: %w", err) + } + } + + return nil + }) + +} diff --git a/format/formatter.go b/format/formatter.go index 36687393..4e611f44 100644 --- a/format/formatter.go +++ b/format/formatter.go @@ -8,7 +8,7 @@ import ( "os/exec" "time" - "git.numtide.com/numtide/treefmt/walk" + "git.numtide.com/numtide/treefmt/walker" "git.numtide.com/numtide/treefmt/config" @@ -89,7 +89,7 @@ func (f *Formatter) Apply(ctx context.Context, tasks []*Task) error { // Wants is used to test if a Formatter wants a path based on it's configured Includes and Excludes patterns. // Returns true if the Formatter should be applied to path, false otherwise. -func (f *Formatter) Wants(file *walk.File) bool { +func (f *Formatter) Wants(file *walker.File) bool { match := !PathMatches(file.RelPath, f.excludes) && PathMatches(file.RelPath, f.includes) if match { f.log.Debugf("match: %v", file) diff --git a/format/task.go b/format/task.go index 062b4544..81d16602 100644 --- a/format/task.go +++ b/format/task.go @@ -4,16 +4,16 @@ import ( "cmp" "slices" - "git.numtide.com/numtide/treefmt/walk" + "git.numtide.com/numtide/treefmt/walker" ) type Task struct { - File *walk.File + File *walker.File Formatters []*Formatter BatchKey string } -func NewTask(file *walk.File, formatters []*Formatter) Task { +func NewTask(file *walker.File, formatters []*Formatter) Task { // sort by priority in ascending order slices.SortFunc(formatters, func(a, b *Formatter) int { priorityA := a.Priority() diff --git a/go.sum b/go.sum index 60fc1785..1f0c0f64 100644 --- a/go.sum +++ b/go.sum @@ -46,12 +46,8 @@ github.com/go-git/go-billy/v5 v5.5.0 h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+ github.com/go-git/go-billy/v5 v5.5.0/go.mod h1:hmexnoNsr2SJU1Ju67OaNz5ASJY3+sHgFRpCtpDCKow= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= -github.com/go-git/go-git/v5 v5.12.0 h1:7Md+ndsjrzZxbddRDZjF14qK+NN56sy6wkqaVrjZtys= -github.com/go-git/go-git/v5 v5.12.0/go.mod h1:FTM9VKtnI2m65hNI/TenDDDnUf2Q9FHnXYjuz9i5OEY= github.com/go-git/go-git/v5 v5.12.1-0.20240409060936-cd6633c3c665 h1:+M6D6MplKwRqmw8b41MArUGFsGTnl24+/S40I0yrhKs= github.com/go-git/go-git/v5 v5.12.1-0.20240409060936-cd6633c3c665/go.mod h1:QZbSbsaXQD7v0yvddAhVE2UfW5wGmeqHQ0UnOSr6JYQ= -github.com/go-git/go-git/v5 v5.12.1-0.20240516215126-9cc340a7fc5c h1:Pyuh3Y6kb/+k7yl5nLN6pcQ+7isiJ1PnBj3QSUH1hbQ= -github.com/go-git/go-git/v5 v5.12.1-0.20240516215126-9cc340a7fc5c/go.mod h1:Tzg+feu/PVazFrRFtWHxfvErT+p+Zo3Yg0nzEhJdxW4= github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= @@ -120,8 +116,6 @@ github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.etcd.io/bbolt v1.3.9 h1:8x7aARPEXiXbHmtUwAIv7eV2fQFHrLLavdiJ3uzJXoI= -go.etcd.io/bbolt v1.3.9/go.mod h1:zaO32+Ti0PK1ivdPtgMESzuzL2VPoIG1PCQNvOdo/dE= go.etcd.io/bbolt v1.3.10 h1:+BqfJTcCzTItrop8mq/lbzL8wSGtj94UO/3U31shqG0= go.etcd.io/bbolt v1.3.10/go.mod h1:bK3UQLPJZly7IlNmV7uVHJDxfe5aK9Ll93e/74Y9oEQ= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -129,12 +123,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.3.1-0.20221117191849-2c476679df9a/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= -golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= -golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= -golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f h1:99ci1mjWVBWwJiEKYY6jWa4d2nTQVIEhZIptnrVb1XY= -golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI= golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= @@ -146,8 +136,6 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -169,8 +157,6 @@ golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -178,9 +164,8 @@ golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuX golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= -golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -188,9 +173,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= diff --git a/test/examples/ruby/bundler.rb b/test/examples/ruby/bundler.rb index 6a3a14fc..9b177459 100644 --- a/test/examples/ruby/bundler.rb +++ b/test/examples/ruby/bundler.rb @@ -155,11 +155,11 @@ def specs_path end def cache - bundle_path.join("cache/bundler") + bundle_path.join("caching/bundler") end def user_cache - user_bundle_path.join("cache") + user_bundle_path.join("caching") end def root diff --git a/walk/filesystem.go b/walk/filesystem.go deleted file mode 100644 index 729a4972..00000000 --- a/walk/filesystem.go +++ /dev/null @@ -1,69 +0,0 @@ -package walk - -import ( - "context" - "fmt" - "io/fs" - "os" - "path/filepath" -) - -type filesystemWalker struct { - root string - pathsCh chan string - relPathOffset int -} - -func (f filesystemWalker) Root() string { - return f.root -} - -func (f filesystemWalker) relPath(path string) (string, error) { - // quick optimization for the majority of use cases - if len(path) >= f.relPathOffset && path[:len(f.root)] == f.root { - return path[f.relPathOffset:], nil - } - // fallback to proper relative path resolution - return filepath.Rel(f.root, path) -} - -func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error { - walkFn := func(path string, info fs.FileInfo, _ error) error { - if info == nil { - return fmt.Errorf("no such file or directory '%s'", path) - } - - // ignore directories and symlinks - if info.IsDir() || info.Mode()&os.ModeSymlink == os.ModeSymlink { - return nil - } - - relPath, err := f.relPath(path) - if err != nil { - return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) - } - - file := File{ - Path: path, - RelPath: relPath, - Info: info, - } - return fn(&file, err) - } - - for path := range f.pathsCh { - if err := filepath.Walk(path, walkFn); err != nil { - return err - } - } - - return nil -} - -func NewFilesystem(root string, paths chan string) (Walker, error) { - return filesystemWalker{ - root: root, - pathsCh: paths, - relPathOffset: len(root) + 1, - }, nil -} diff --git a/walker/filesystem.go b/walker/filesystem.go new file mode 100644 index 00000000..a99d7252 --- /dev/null +++ b/walker/filesystem.go @@ -0,0 +1,144 @@ +package walker + +import ( + "context" + "fmt" + "git.numtide.com/numtide/treefmt/cache" + "git.numtide.com/numtide/treefmt/stats" + "io/fs" + "os" + "path/filepath" + "runtime" +) + +type filesystemWalker struct { + root string + cache *cache.Cache + pathsCh chan string + relPathOffset int +} + +func (f filesystemWalker) UpdatePaths(batch []*File) error { + if err := f.cache.Update(func(tx *cache.Tx) error { + // get the paths bucket + paths, err := tx.Paths() + if err != nil { + return err + } + for _, f := range batch { + entry := cache.Entry{ + Size: f.Info.Size(), + Modified: f.Info.ModTime(), + } + if err := paths.Put(f.RelPath, &entry); err != nil { + return err + } + } + + return nil + }); err != nil { + return fmt.Errorf("failed to update paths: %w", err) + } + return nil +} + +func (f filesystemWalker) Root() string { + return f.root +} + +func (f filesystemWalker) relPath(path string) (string, error) { + // quick optimization for the majority of use cases + if len(path) >= f.relPathOffset && path[:len(f.root)] == f.root { + return path[f.relPathOffset:], nil + } + // fallback to proper relative path resolution + return filepath.Rel(f.root, path) +} + +func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error { + + var tx *cache.Tx + var paths *cache.Bucket[cache.Entry] + var processed int + batchSize := 1024 * runtime.NumCPU() + + defer func() { + // close any pending read tx + if tx != nil { + _ = tx.Rollback() + } + }() + + walkFn := func(path string, info fs.FileInfo, _ error) error { + if info == nil { + return fmt.Errorf("no such file or directory '%s'", path) + } + + // ignore directories and symlinks + if info.IsDir() || info.Mode()&os.ModeSymlink == os.ModeSymlink { + return nil + } + + relPath, err := f.relPath(path) + if err != nil { + return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) + } + + file := File{ + Path: path, + RelPath: relPath, + Info: info, + } + + // open a new read tx if there isn't one in progress + // we have to periodically open a new read tx to prevent writes from being blocked + if tx == nil { + if tx, err = f.cache.BeginTx(false); err != nil { + return fmt.Errorf("failed to open a new cache read tx: %w", err) + } else if paths, err = tx.Paths(); err != nil { + return fmt.Errorf("failed to get paths bucket from cache tx: %w", err) + } + } + + cached, err := paths.Get(file.RelPath) + if err != nil { + return err + } + + // close the current tx if we have reached the batch size + processed += 1 + if processed == batchSize { + if err = tx.Rollback(); err != nil { + return err + } + tx = nil + } + + // + changedOrNew := cached == nil || !(cached.Modified == file.Info.ModTime() && cached.Size == file.Info.Size()) + + stats.Add(stats.Traversed, 1) + if !changedOrNew { + // no change + return nil + } + return fn(&file, err) + } + + for path := range f.pathsCh { + if err := filepath.Walk(path, walkFn); err != nil { + return err + } + } + + return nil +} + +func NewFilesystem(root string, cache *cache.Cache, paths chan string) (Walker, error) { + return filesystemWalker{ + root: root, + cache: cache, + pathsCh: paths, + relPathOffset: len(root) + 1, + }, nil +} diff --git a/walk/filesystem_test.go b/walker/filesystem_test.go similarity index 85% rename from walk/filesystem_test.go rename to walker/filesystem_test.go index 503ec0d3..c94755c7 100644 --- a/walk/filesystem_test.go +++ b/walker/filesystem_test.go @@ -1,10 +1,13 @@ -package walk +package walker import ( "context" "os" "testing" + "git.numtide.com/numtide/treefmt/cache" + "git.numtide.com/numtide/treefmt/stats" + "git.numtide.com/numtide/treefmt/test" "github.com/stretchr/testify/require" ) @@ -55,7 +58,12 @@ func TestFilesystemWalker_Walk(t *testing.T) { as := require.New(t) - walker, err := NewFilesystem(tempDir, paths) + stats.Init() + + c, err := cache.Open(t.TempDir(), false) + as.NoError(err, "failed to open cache") + + walker, err := NewFilesystem(tempDir, c, paths) as.NoError(err) idx := 0 diff --git a/walk/git.go b/walker/git.go similarity index 61% rename from walk/git.go rename to walker/git.go index 4dc91967..4b51c2e2 100644 --- a/walk/git.go +++ b/walker/git.go @@ -1,11 +1,16 @@ -package walk +package walker import ( "context" "fmt" + "git.numtide.com/numtide/treefmt/cache" + "git.numtide.com/numtide/treefmt/stats" "io/fs" "os" "path/filepath" + "time" + + "github.com/go-git/go-git/v5/plumbing/format/index" "github.com/charmbracelet/log" @@ -13,12 +18,19 @@ import ( ) type gitWalker struct { - root string - paths chan string - repo *git.Repository + root string + paths chan string + repo *git.Repository + + noCache bool relPathOffset int } +func (g gitWalker) UpdatePaths(_ []*File) error { + // nothing to do, git is doing all the tracking + return nil +} + func (g gitWalker) Root() string { return g.root } @@ -39,7 +51,20 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { } // cache in-memory whether a path is present in the git index - var cache map[string]bool + var indexCache map[string]*index.Entry + + // by default, we only emit files if they have changes when compared with the git index + emitFile := func(entry *index.Entry, info os.FileInfo) bool { + // mod time comparison is done with EPOCH (second) precision as per the POSIX spec + return entry.ModifiedAt.Truncate(time.Second) != info.ModTime().Truncate(time.Second) + } + + if g.noCache { + // emit all files in the index + emitFile = func(entry *index.Entry, info os.FileInfo) bool { + return true + } + } for path := range g.paths { @@ -63,6 +88,11 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { return fmt.Errorf("failed to stat %s: %w", path, err) } + // skip processing if the file hasn't changed + if !emitFile(entry, info) { + continue + } + // determine a relative path relPath, err := g.relPath(path) if err != nil { @@ -75,6 +105,7 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { Info: info, } + stats.Add(stats.Traversed, 1) if err = fn(&file, err); err != nil { return err } @@ -83,11 +114,11 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { continue } - // otherwise we ensure the git index entries are cached and then check if they are in the git index - if cache == nil { - cache = make(map[string]bool) + // otherwise we ensure the git index entries are cached and then check if the path is in the git index + if indexCache == nil { + indexCache = make(map[string]*index.Entry) for _, entry := range idx.Entries { - cache[entry.Name] = true + indexCache[entry.Name] = entry } } @@ -96,14 +127,15 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { return fmt.Errorf("failed to find relative path for %v: %w", path, err) } - _, ok := cache[relPath] + _, ok := indexCache[relPath] if !(path == g.root || ok) { log.Debugf("path %v not found in git index, skipping", path) continue } return filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error { - if info.IsDir() { + // ignore directories and symlinks + if info.IsDir() || info.Mode()&os.ModeSymlink == os.ModeSymlink { return nil } @@ -112,9 +144,12 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) } - if _, ok := cache[relPath]; !ok { + if entry, ok := indexCache[relPath]; !ok { log.Debugf("path %v not found in git index, skipping", path) return nil + } else if !emitFile(entry, info) { + log.Debugf("path %v has not changed, skipping", path) + return nil } file := File{ @@ -123,6 +158,7 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { Info: info, } + stats.Add(stats.Traversed, 1) return fn(&file, err) }) } @@ -130,15 +166,26 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { return nil } -func NewGit(root string, paths chan string) (Walker, error) { +func NewGit( + root string, + cache *cache.Cache, + paths chan string, +) (Walker, error) { repo, err := git.PlainOpen(root) if err != nil { return nil, fmt.Errorf("failed to open git repo: %w", err) } + + // immediately close the cache as we have no need for it + if err = cache.Close(); err != nil { + return nil, fmt.Errorf("failed to close cache: %w", err) + } + return &gitWalker{ root: root, paths: paths, repo: repo, + noCache: cache.Temporary, relPathOffset: len(root) + 1, }, nil } diff --git a/walk/walker.go b/walker/walker.go similarity index 74% rename from walk/walker.go rename to walker/walker.go index e3afb77f..84d4b361 100644 --- a/walk/walker.go +++ b/walker/walker.go @@ -1,8 +1,9 @@ -package walk +package walker import ( "context" "fmt" + "git.numtide.com/numtide/treefmt/cache" "io/fs" "os" "time" @@ -54,26 +55,33 @@ type WalkFunc func(file *File, err error) error type Walker interface { Root() string Walk(ctx context.Context, fn WalkFunc) error + UpdatePaths(batch []*File) error } -func New(walkerType Type, root string, pathsCh chan string) (Walker, error) { +func New( + walkerType Type, + root string, + cache *cache.Cache, + pathsCh chan string, +) (Walker, error) { + switch walkerType { case Git: - return NewGit(root, pathsCh) + return NewGit(root, cache, pathsCh) case Auto: - return Detect(root, pathsCh) + return Detect(root, cache, pathsCh) case Filesystem: - return NewFilesystem(root, pathsCh) + return NewFilesystem(root, cache, pathsCh) default: return nil, fmt.Errorf("unknown walker type: %v", walkerType) } } -func Detect(root string, pathsCh chan string) (Walker, error) { +func Detect(root string, cache *cache.Cache, pathsCh chan string) (Walker, error) { // for now, we keep it simple and try git first, filesystem second - w, err := NewGit(root, pathsCh) + w, err := NewGit(root, cache, pathsCh) if err == nil { return w, err } - return NewFilesystem(root, pathsCh) + return NewFilesystem(root, cache, pathsCh) }