From fd5996948aba75fc78063609d556c41f58d393c1 Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Thu, 17 Oct 2024 12:52:08 +0100 Subject: [PATCH] feat: configurable batch size Signed-off-by: Brian McGee --- cmd/format/format.go | 8 ++------ cmd/root_test.go | 35 +++++++++++++++++++++++++++++++++++ config/config.go | 22 ++++++++++++++++++++++ format/composite.go | 3 +-- format/formatter_test.go | 35 +++++++++++++++-------------------- 5 files changed, 75 insertions(+), 28 deletions(-) diff --git a/cmd/format/format.go b/cmd/format/format.go index 40dcc094..1012efd3 100644 --- a/cmd/format/format.go +++ b/cmd/format/format.go @@ -24,10 +24,6 @@ import ( bolt "go.etcd.io/bbolt" ) -const ( - BatchSize = 1024 -) - var ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled") func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) error { @@ -156,7 +152,7 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) } // create a composite formatter which will handle applying the correct formatters to each file we traverse - formatter, err := format.NewCompositeFormatter(cfg, statz, BatchSize) + formatter, err := format.NewCompositeFormatter(cfg, statz) if err != nil { return fmt.Errorf("failed to create composite formatter: %w", err) } @@ -175,7 +171,7 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) } // start traversing - files := make([]*walk.File, BatchSize) + files := make([]*walk.File, cfg.BatchSize) for { // read the next batch diff --git a/cmd/root_test.go b/cmd/root_test.go index f7de2f9f..281907c2 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -112,6 +112,41 @@ func TestCpuProfile(t *testing.T) { as.NoError(err) } +func TestBatchSize(t *testing.T) { + as := require.New(t) + tempDir := test.TempExamples(t) + + // capture current cwd, so we can replace it after the test is finished + cwd, err := os.Getwd() + as.NoError(err) + + t.Cleanup(func() { + // return to the previous working directory + as.NoError(os.Chdir(cwd)) + }) + + _, _, err = treefmt(t, "-C", tempDir, "--allow-missing-formatter", "--batch-size", "10241") + as.ErrorIs(err, config.ErrInvalidBatchSize) + + _, _, err = treefmt(t, "-C", tempDir, "--allow-missing-formatter", "--batch-size", "-10241") + as.ErrorContains(err, "invalid argument") + + _, _, err = treefmt(t, "-C", tempDir, "--allow-missing-formatter") + as.NoError(err) + + out, _, err := treefmt(t, "-C", tempDir, "--allow-missing-formatter", "--batch-size", "1", "-v") + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("INFO config: batch size = %d", 1)) + + out, _, err = treefmt(t, "-C", tempDir, "--allow-missing-formatter", "--batch-size", "4", "-v") + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("INFO config: batch size = %d", 4)) + + out, _, err = treefmt(t, "-C", tempDir, "--allow-missing-formatter", "--batch-size", "128", "-v") + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("INFO config: batch size = %d", 128)) +} + func TestAllowMissingFormatter(t *testing.T) { as := require.New(t) diff --git a/config/config.go b/config/config.go index 906e9030..b0f479b7 100644 --- a/config/config.go +++ b/config/config.go @@ -6,14 +6,18 @@ import ( "path/filepath" "strings" + "github.com/charmbracelet/log" "github.com/numtide/treefmt/walk" "github.com/spf13/pflag" "github.com/spf13/viper" ) +var ErrInvalidBatchSize = fmt.Errorf("batch size must be between 1 and 10,240") + // Config is used to represent the list of configured Formatters. type Config struct { AllowMissingFormatter bool `mapstructure:"allow-missing-formatter" toml:"allow-missing-formatter,omitempty"` + BatchSize int `mapstructure:"batch-size" toml:"batch-size,omitempty"` CI bool `mapstructure:"ci" toml:"ci,omitempty"` ClearCache bool `mapstructure:"clear-cache" toml:"-"` // not allowed in config CPUProfile string `mapstructure:"cpu-profile" toml:"cpu-profile,omitempty"` @@ -59,6 +63,9 @@ func SetFlags(fs *pflag.FlagSet) { "allow-missing-formatter", false, "Do not exit with error if a configured formatter is missing. (env $TREEFMT_ALLOW_MISSING_FORMATTER)", ) + fs.Uint("batch-size", 1024, + "The maximum number of files to pass to a formatter at once. (env $TREEFMT_BATCH_SIZE)", + ) fs.Bool( "ci", false, "Runs treefmt in a CI mode, enabling --no-cache, --fail-on-change and adjusting some other settings "+ @@ -235,6 +242,21 @@ func FromViper(v *viper.Viper) (*Config, error) { } } + // validate batch size + // todo what is a reasonable upper limit on this? + + // default if it isn't set (e.g. in tests when using Config directly) + if cfg.BatchSize == 0 { + cfg.BatchSize = 1024 + } + + if !(1 <= cfg.BatchSize && cfg.BatchSize <= 10240) { + return nil, ErrInvalidBatchSize + } + + l := log.WithPrefix("config") + l.Infof("batch size = %d", cfg.BatchSize) + return cfg, nil } diff --git a/format/composite.go b/format/composite.go index a1313830..b418c8e6 100644 --- a/format/composite.go +++ b/format/composite.go @@ -344,7 +344,6 @@ func (c *CompositeFormatter) BustCache(db *bolt.DB) error { func NewCompositeFormatter( cfg *config.Config, statz *stats.Stats, - batchSize int, ) (*CompositeFormatter, error) { // compile global exclude globs globalExcludes, err := compileGlobs(cfg.Excludes) @@ -392,7 +391,7 @@ func NewCompositeFormatter( return &CompositeFormatter{ cfg: cfg, stats: statz, - batchSize: batchSize, + batchSize: cfg.BatchSize, globalExcludes: globalExcludes, log: log.WithPrefix("composite-formatter"), diff --git a/format/formatter_test.go b/format/formatter_test.go index d3b617d1..c71eac3a 100644 --- a/format/formatter_test.go +++ b/format/formatter_test.go @@ -17,15 +17,13 @@ import ( func TestInvalidFormatterName(t *testing.T) { as := require.New(t) - const batchSize = 1024 - cfg := &config.Config{} cfg.OnUnmatched = "info" statz := stats.New() // simple "empty" config - _, err := format.NewCompositeFormatter(cfg, &statz, batchSize) + _, err := format.NewCompositeFormatter(cfg, &statz) as.NoError(err) // valid name using all the acceptable characters @@ -35,7 +33,7 @@ func TestInvalidFormatterName(t *testing.T) { }, } - _, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + _, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) // test with some bad examples @@ -48,7 +46,7 @@ func TestInvalidFormatterName(t *testing.T) { }, } - _, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + _, err = format.NewCompositeFormatter(cfg, &statz) as.ErrorIs(err, format.ErrInvalidName) } } @@ -56,10 +54,7 @@ func TestInvalidFormatterName(t *testing.T) { func TestFormatterHash(t *testing.T) { as := require.New(t) - const batchSize = 1024 - statz := stats.New() - tempDir := t.TempDir() // symlink some formatters into temp dir, so we can mess with their mod times @@ -93,7 +88,7 @@ func TestFormatterHash(t *testing.T) { }, } - f, err := format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err := format.NewCompositeFormatter(cfg, &statz) as.NoError(err) // hash for the first time @@ -123,7 +118,7 @@ func TestFormatterHash(t *testing.T) { }) t.Run("modify formatter options", func(_ *testing.T) { - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h3, err := f.Hash() @@ -133,7 +128,7 @@ func TestFormatterHash(t *testing.T) { python := cfg.FormatterConfigs["python"] python.Includes = []string{"*.py", "*.pyi"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h4, err := f.Hash() @@ -148,7 +143,7 @@ func TestFormatterHash(t *testing.T) { // adjust python excludes python.Excludes = []string{"*.pyi"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h6, err := f.Hash() @@ -163,7 +158,7 @@ func TestFormatterHash(t *testing.T) { // adjust python options python.Options = []string{"-w", "-s"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h8, err := f.Hash() @@ -178,7 +173,7 @@ func TestFormatterHash(t *testing.T) { // adjust python priority python.Priority = 100 - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h10, err := f.Hash() @@ -198,7 +193,7 @@ func TestFormatterHash(t *testing.T) { Includes: []string{"*.go"}, } - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h3, err := f.Hash() @@ -208,7 +203,7 @@ func TestFormatterHash(t *testing.T) { // remove python formatter delete(cfg.FormatterConfigs, "python") - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h4, err := f.Hash() @@ -223,7 +218,7 @@ func TestFormatterHash(t *testing.T) { // remove elm formatter delete(cfg.FormatterConfigs, "elm") - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h6, err := f.Hash() @@ -239,7 +234,7 @@ func TestFormatterHash(t *testing.T) { t.Run("modify global excludes", func(_ *testing.T) { cfg.Excludes = []string{"*.go"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h3, err := f.Hash() @@ -247,7 +242,7 @@ func TestFormatterHash(t *testing.T) { cfg.Excludes = []string{"*.go", "*.hs"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h4, err := f.Hash() @@ -263,7 +258,7 @@ func TestFormatterHash(t *testing.T) { cfg.Excludes = nil cfg.Global.Excludes = []string{"*.go", "*.hs"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h6, err := f.Hash()