Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: match before checking cache
Browse files Browse the repository at this point in the history
This changes the flow of processing to make unmatched behaviour more consistent.

Before, we had been:

- traversing the filesystem
- comparing with the cache and only emitting files which had changed
- applying the matching rules to determine which formatters should be applied to a given file
- applying the formatters

Now, we do the following:

- traverse the filesystem
- apply the matching rules to determine which formatters should be applied to a given file
- compare with the cache and only emit files which have changed for formatting
- apply the formatters

It does mean we are applying the matching rules against files which we may not have to format, but in testing against Nixpkgs the performance impact appears negligible.

This makes sense since most of the processing time will be spent in the formatters, not applying some globs to file paths.

In making this change, I have refined how the statistics work to make the naming and output make more sense.

Before we would output:

```
traversed 41273 files
emitted 41273 files for processing
formatted 34111 files (14338 changed) in 23.679s
```

Not we output:

```
traversed 43492 files
matched 36059 files
formatted 36059 files (19 changed) in 19.324s
```

Signed-off-by: Brian McGee <brian@bmcgee.ie>
brianmcgee committed Oct 13, 2024
1 parent ed8979e commit 0e8ffff
Showing 6 changed files with 500 additions and 92 deletions.
80 changes: 51 additions & 29 deletions cmd/format/format.go
Original file line number Diff line number Diff line change
@@ -205,33 +205,34 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string)
return fmt.Errorf("failed to create walker: %w", err)
}

// start traversing
files := make([]*walk.File, BatchSize)

for {
// read the next batch
ctx, cancel := context.WithTimeout(ctx, 1*time.Second)

n, err := reader.Read(ctx, files)

for idx := 0; idx < n; idx++ {
file := files[idx]
// ensure context is cancelled to release resources
cancel()

// check if this file is new or has changed when compared to the cache entry
if file.Cache == nil || file.Cache.HasChanged(file.Info) {
filesCh <- file
statz.Add(stats.Emitted, 1)
}
// pass each file into the file channel for processing
for idx := 0; idx < n; idx++ {
filesCh <- files[idx]
}

cancel()

if errors.Is(err, io.EOF) {
// we have finished traversing
break
} else if err != nil {
// something went wrong
log.Errorf("failed to read files: %v", err)
cancel()
break
}
}

// indicate no further files for processing
close(filesCh)

// wait for everything to complete
@@ -263,6 +264,8 @@ func applyFormatters(
// formatters which should be applied to their respective files
batches := make(map[string][]*format.Task)

// apply check if the given batch key has enough tasks to trigger processing
// flush is used to force processing regardless of the number of tasks
apply := func(key string, flush bool) {
// lookup the batch and exit early if it's empty
batch := batches[key]
@@ -304,6 +307,7 @@ func applyFormatters(
}
}

// tryApply batches tasks by their batch key and processes the batch if there is enough ready
tryApply := func(task *format.Task) {
// append to batch
key := task.BatchKey
@@ -314,25 +318,29 @@ func applyFormatters(

return func() error {
defer func() {
// close processed channel
// indicate processing has finished
close(formattedCh)
}()

// parse unmatched log level
unmatchedLevel, err := log.ParseLevel(cfg.OnUnmatched)
if err != nil {
return fmt.Errorf("invalid on-unmatched value: %w", err)
}

// iterate the files channel
// iterate the file channel
for file := range filesCh {

// first check if this file has been globally excluded
if format.PathMatches(file.RelPath, globalExcludes) {
log.Debugf("path matched global excludes: %s", file.RelPath)
// mark it as processed and continue to the next
formattedCh <- &format.Task{
File: file,

// release the file
if err := file.Release(); err != nil {
return fmt.Errorf("failed to release file: %w", err)
}

// continue to the next file
continue
}

@@ -344,21 +352,30 @@ func applyFormatters(
}
}

// see if any formatters matched
// check for no matches
if len(matches) == 0 {

// log that there was no match, exiting with an error if the unmatched level was set to fatal
if unmatchedLevel == log.FatalLevel {
return fmt.Errorf("no formatter for path: %s", file.RelPath)
}

log.Logf(unmatchedLevel, "no formatter for path: %s", file.RelPath)
// mark it as processed and continue to the next
formattedCh <- &format.Task{
File: file,

// release the file
if err := file.Release(); err != nil {
return fmt.Errorf("failed to release file: %w", err)
}
} else {
// record the match
statz.Add(stats.Matched, 1)
// create a new format task, add it to a batch based on its batch key and try to apply if the batch is full

// continue to the next file
continue
}

// record there was a match
statz.Add(stats.Matched, 1)

// check if this file is new or has changed when compared to the cache entry
if file.Cache == nil || file.Cache.HasChanged(file.Info) {
// if so, generate a format task, add it to the relevant batch (by batch key) and try to process
task := format.NewTask(file, matches)
tryApply(&task)
}
@@ -398,16 +415,21 @@ func postProcessing(
break LOOP
}

// check if the file has changed
// grab the underlying file reference
file := task.File

// check if the file has changed
changed, newInfo, err := file.Stat()
if err != nil {
return err
}

// record that a format occurred
statz.Add(stats.Formatted, 1)

if changed {
// record the change
statz.Add(stats.Formatted, 1)
// record that a change in the underlying file occurred
statz.Add(stats.Changed, 1)

logMethod := log.Debug
if cfg.FailOnChange {
@@ -434,8 +456,8 @@ func postProcessing(
}
}

// if fail on change has been enabled, check that no files were actually formatted, throwing an error if so
if cfg.FailOnChange && statz.Value(stats.Formatted) != 0 {
// if fail on change has been enabled, check that no files were actually changed, throwing an error if so
if cfg.FailOnChange && statz.Value(stats.Changed) != 0 {
return ErrFailOnChange
}

Loading

0 comments on commit 0e8ffff

Please sign in to comment.