Skip to content

Commit

Permalink
fixup! feat: improve formatter/config change detection
Browse files Browse the repository at this point in the history
Signed-off-by: Brian McGee <[email protected]>
  • Loading branch information
brianmcgee committed Oct 17, 2024
1 parent 1bebe54 commit ac13194
Show file tree
Hide file tree
Showing 11 changed files with 134 additions and 391 deletions.
7 changes: 0 additions & 7 deletions cmd/format/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,6 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string)
return fmt.Errorf("failed to create composite formatter: %w", err)
}

if db != nil {
// compare formatters with the db, busting the cache if the formatters have changed
if err := formatter.BustCache(db); err != nil {
return fmt.Errorf("failed to compare formatters: %w", err)
}
}

// create a new walker for traversing the paths
walker, err := walk.NewCompositeReader(walkType, cfg.TreeRoot, paths, db, statz)
if err != nil {
Expand Down
14 changes: 7 additions & 7 deletions cmd/root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) {
assertStats(t, as, statz, map[stats.Type]int{
stats.Traversed: 32,
stats.Matched: 3,
stats.Formatted: 3,
stats.Formatted: 1,
stats.Changed: 0,
})

Expand All @@ -680,7 +680,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) {
assertStats(t, as, statz, map[stats.Type]int{
stats.Traversed: 32,
stats.Matched: 3,
stats.Formatted: 3,
stats.Formatted: 2,
stats.Changed: 0,
})

Expand Down Expand Up @@ -710,7 +710,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) {
assertStats(t, as, statz, map[stats.Type]int{
stats.Traversed: 32,
stats.Matched: 4,
stats.Formatted: 4,
stats.Formatted: 1,
stats.Changed: 0,
})

Expand All @@ -736,7 +736,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) {
assertStats(t, as, statz, map[stats.Type]int{
stats.Traversed: 32,
stats.Matched: 4,
stats.Formatted: 4,
stats.Formatted: 1,
stats.Changed: 0,
})

Expand All @@ -750,7 +750,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) {
assertStats(t, as, statz, map[stats.Type]int{
stats.Traversed: 32,
stats.Matched: 4,
stats.Formatted: 4,
stats.Formatted: 1,
stats.Changed: 0,
})

Expand All @@ -764,7 +764,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) {
assertStats(t, as, statz, map[stats.Type]int{
stats.Traversed: 32,
stats.Matched: 2,
stats.Formatted: 2,
stats.Formatted: 0,
stats.Changed: 0,
})

Expand All @@ -789,7 +789,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) {
assertStats(t, as, statz, map[stats.Type]int{
stats.Traversed: 32,
stats.Matched: 1,
stats.Formatted: 1,
stats.Formatted: 0,
stats.Changed: 0,
})

Expand Down
166 changes: 86 additions & 80 deletions format/composite.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package format

import (
"bytes"
"cmp"
"context"
"crypto/sha256"
Expand All @@ -19,8 +20,6 @@ import (
"github.com/numtide/treefmt/config"
"github.com/numtide/treefmt/stats"
"github.com/numtide/treefmt/walk"
"github.com/numtide/treefmt/walk/cache"
bolt "go.etcd.io/bbolt"
"golang.org/x/sync/errgroup"
"mvdan.cc/sh/v3/expand"
)
Expand Down Expand Up @@ -52,36 +51,81 @@ func newBatchKey(formatters []*Formatter) batchKey {
return batchKey(strings.Join(components, batchKeySeparator))
}

// batchMap maintains a mapping between batchKey and a slice of pointers to walk.File, used to organize files into
// batches based on the sequence of formatters to be applied.
type batchMap map[batchKey][]*walk.File
type batcher struct {
batchSize int
batches map[batchKey][]*walk.File
formatters map[string]*Formatter

func formatterSortFunc(a, b *Formatter) int {
// sort by priority in ascending order
priorityA := a.Priority()
priorityB := b.Priority()
signatures map[batchKey][]byte
}

result := priorityA - priorityB
if result == 0 {
// formatters with the same priority are sorted lexicographically to ensure a deterministic outcome
result = cmp.Compare(a.Name(), b.Name())
func (b *batcher) formatSignature(key batchKey, matches []*Formatter) ([]byte, error) {
signature, ok := b.signatures[key]
if ok {
return signature, nil
}

return result
h := sha256.New()
for _, f := range matches {
if err := f.Hash(h); err != nil {
return nil, fmt.Errorf("failed to hash formatter %s: %w", f.Name(), err)
}
}

signature = h.Sum(nil)
b.signatures[key] = signature

return signature, nil
}

// Append adds a file to the batch corresponding to the given sequence of formatters and returns the updated batch.
func (b batchMap) Append(file *walk.File, matches []*Formatter) (key batchKey, batch []*walk.File) {
func (b *batcher) append(
file *walk.File,
matches []*Formatter,
) (appended bool, key batchKey, batch []*walk.File, err error) {
slices.SortFunc(matches, formatterSortFunc)

// construct a batch key based on the sequence of formatters
key = newBatchKey(matches)

// get format signature
formatSignature, err := b.formatSignature(key, matches)
if err != nil {
return false, "", nil, err
}

//
h := sha256.New()
h.Write(formatSignature)
h.Write([]byte(fmt.Sprintf("%v %v", file.Info.ModTime().Unix(), file.Info.Size())))

file.FormatSignature = h.Sum(nil)

if bytes.Equal(file.FormatSignature, file.CachedFormatSignature) {
return false, key, b.batches[key], nil
}

// append to the batch
b[key] = append(b[key], file)
b.batches[key] = append(b.batches[key], file)

return true, key, b.batches[key], nil
}

func (b *batcher) reset(key batchKey) {
b.batches[key] = make([]*walk.File, 0, b.batchSize)
}

func formatterSortFunc(a, b *Formatter) int {
// sort by priority in ascending order
priorityA := a.Priority()
priorityB := b.Priority()

// return the batch
return key, b[key]
result := priorityA - priorityB
if result == 0 {
// formatters with the same priority are sorted lexicographically to ensure a deterministic outcome
result = cmp.Compare(a.Name(), b.Name())
}

return result
}

// CompositeFormatter handles the application of multiple Formatter instances based on global excludes and individual
Expand All @@ -99,7 +143,7 @@ type CompositeFormatter struct {
formatters map[string]*Formatter

eg *errgroup.Group
batches batchMap
batcher *batcher

// formatError indicates if at least one formatting error occurred
formatError *atomic.Bool
Expand Down Expand Up @@ -216,16 +260,17 @@ func (c *CompositeFormatter) Apply(ctx context.Context, files []*walk.File) erro
// record there was a match
c.stats.Add(stats.Matched, 1)

// check if the file is new or has changed when compared to the cache entry
if file.Cache == nil || file.Cache.HasChanged(file.Info) {
// add this file to a batch and if it's full, apply formatters to the batch
if key, batch := c.batches.Append(file, matches); len(batch) == c.batchSize {
c.apply(ctx, newBatchKey(matches), batch)
// reset the batch
c.batches[key] = make([]*walk.File, 0, c.batchSize)
}
} else {
// no further processing to be done, append to the release list
appended, key, batch, batchErr := c.batcher.append(file, matches)
if batchErr != nil {
return fmt.Errorf("failed to append file to batch: %w", batchErr)
}

if len(batch) == c.batchSize {
c.apply(ctx, key, batch)
c.batcher.reset(key)
}

if !appended {
toRelease = append(toRelease, file)
}
}
Expand All @@ -247,7 +292,8 @@ func (c *CompositeFormatter) Apply(ctx context.Context, files []*walk.File) erro
// all formatters have completed their tasks. It returns an error if any formatting failures were detected.
func (c *CompositeFormatter) Close(ctx context.Context) error {
// flush any partial batches that remain
for key, batch := range c.batches {
// todo clean up
for key, batch := range c.batcher.batches {
if len(batch) > 0 {
c.apply(ctx, key, batch)
}
Expand Down Expand Up @@ -289,54 +335,6 @@ func (c *CompositeFormatter) Hash() (string, error) {
return hex.EncodeToString(h.Sum(nil)), nil
}

// BustCache compares the current Hash() of this formatter with the last entry stored in the db.
// If it has changed, we remove all the path entries, forcing a fresh cache state.
func (c *CompositeFormatter) BustCache(db *bolt.DB) error {
// determine current hash
currentHash, err := c.Hash()
if err != nil {
return fmt.Errorf("failed to hash formatter: %w", err)
}

hashKey := []byte("sha256")
bucketKey := []byte("formatters")

return db.Update(func(tx *bolt.Tx) error {
// get or create the formatters bucket
bucket, err := tx.CreateBucketIfNotExists(bucketKey)
if err != nil {
return fmt.Errorf("failed to create formatters bucket: %w", err)
}

// load the previous hash which might be nil
prevHash := bucket.Get(hashKey)

c.log.Debug(
"comparing formatter hash with db",
"prev_hash", string(prevHash),
"current_hash", currentHash,
)

// compare with the previous hash
// if they are different, delete all the path entries
if string(prevHash) != currentHash {
c.log.Debug("hash has changed, deleting all paths")

paths, err := cache.BucketPaths(tx)
if err != nil {
return fmt.Errorf("failed to get paths bucket from cache: %w", err)
}

if err = paths.DeleteAll(); err != nil {
return fmt.Errorf("failed to delete paths bucket: %w", err)
}
}

// save the latest hash
return bucket.Put(hashKey, []byte(currentHash))
})
}

func NewCompositeFormatter(
cfg *config.Config,
statz *stats.Stats,
Expand Down Expand Up @@ -380,6 +378,14 @@ func NewCompositeFormatter(
formatters[name] = formatter
}

//
batcher := batcher{
batchSize: batchSize,
formatters: formatters,
batches: make(map[batchKey][]*walk.File),
signatures: make(map[batchKey][]byte),
}

// create an errgroup for asynchronously formatting
eg := errgroup.Group{}
// we use a simple heuristic to avoid too much contention by limiting the concurrency to runtime.NumCPU()
Expand All @@ -396,8 +402,8 @@ func NewCompositeFormatter(
unmatchedLevel: unmatchedLevel,

eg: &eg,
batcher: &batcher,
formatters: formatters,
batches: make(batchMap),
formatError: new(atomic.Bool),
}, nil
}
5 changes: 2 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ require (
github.com/charmbracelet/log v0.4.0
github.com/gobwas/glob v0.2.3
github.com/otiai10/copy v1.14.0
github.com/rogpeppe/go-internal v1.12.0
github.com/spf13/cobra v1.8.1
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.19.0
github.com/stretchr/testify v1.9.0
github.com/vmihailenco/msgpack/v5 v5.4.1
go.etcd.io/bbolt v1.3.11
golang.org/x/sync v0.8.0
golang.org/x/sys v0.25.0
mvdan.cc/sh/v3 v3.9.0
)

Expand Down Expand Up @@ -43,11 +44,9 @@ require (
github.com/spf13/afero v1.11.0 // indirect
github.com/spf13/cast v1.6.0 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
go.uber.org/atomic v1.9.0 // indirect
go.uber.org/multierr v1.9.0 // indirect
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/term v0.24.0 // indirect
golang.org/x/text v0.18.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
Expand Down
4 changes: 0 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,6 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8=
github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok=
github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0=
go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I=
go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE=
Expand Down
9 changes: 3 additions & 6 deletions nix/packages/treefmt/gomod2nix.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ schema = 3
[mod."github.com/rivo/uniseg"]
version = "v0.4.7"
hash = "sha256-rDcdNYH6ZD8KouyyiZCUEy8JrjOQoAkxHBhugrfHjFo="
[mod."github.com/rogpeppe/go-internal"]
version = "v1.12.0"
hash = "sha256-qvDNCe3l84/LgrA8X4O15e1FeDcazyX91m9LmXGXX6M="
[mod."github.com/sagikazarmark/locafero"]
version = "v0.4.0"
hash = "sha256-7I1Oatc7GAaHgAqBFO6Tv4IbzFiYeU9bJAfJhXuWaXk="
Expand Down Expand Up @@ -100,12 +103,6 @@ schema = 3
[mod."github.com/subosito/gotenv"]
version = "v1.6.0"
hash = "sha256-LspbjTniiq2xAICSXmgqP7carwlNaLqnCTQfw2pa80A="
[mod."github.com/vmihailenco/msgpack/v5"]
version = "v5.4.1"
hash = "sha256-pDplX6xU6UpNLcFbO1pRREW5vCnSPvSU+ojAwFDv3Hk="
[mod."github.com/vmihailenco/tagparser/v2"]
version = "v2.0.0"
hash = "sha256-M9QyaKhSmmYwsJk7gkjtqu9PuiqZHSmTkous8VWkWY0="
[mod."go.etcd.io/bbolt"]
version = "v1.3.11"
hash = "sha256-SVWYZtE9TBgAo8xJSmo9DtSwuNa056N3zGvPLDJgiA8="
Expand Down
Loading

0 comments on commit ac13194

Please sign in to comment.