diff --git a/cli/format.go b/cli/format.go index 4b14220c..e1259f6e 100644 --- a/cli/format.go +++ b/cli/format.go @@ -30,11 +30,11 @@ const ( ) var ( - globalExcludes []glob.Glob - formatters map[string]*format.Formatter - pipelines map[string]*format.Pipeline - filesCh chan *walk.File - processedCh chan *walk.File + excludes []glob.Glob + formatters map[string]*format.Formatter + + filesCh chan *walk.File + processedCh chan *walk.File ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled") ) @@ -73,21 +73,18 @@ func (f *Format) Run() (err error) { } // compile global exclude globs - if globalExcludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil { + if excludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil { return fmt.Errorf("failed to compile global excludes: %w", err) } - // initialise pipelines - pipelines = make(map[string]*format.Pipeline) + // initialise formatters formatters = make(map[string]*format.Formatter) - // iterate the formatters in lexicographical order - for _, name := range cfg.Names { - // init formatter - formatterCfg := cfg.Formatters[name] - formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes) + for name, formatterCfg := range cfg.Formatters { + formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, excludes) + if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter { - log.Debugf("formatter not found: %v", name) + log.Debugf("formatter command not found: %v", name) continue } else if err != nil { return fmt.Errorf("%w: failed to initialise formatter: %v", err, name) @@ -95,23 +92,6 @@ func (f *Format) Run() (err error) { // store formatter by name formatters[name] = formatter - - // If no pipeline is configured, we add the formatter to a nominal pipeline of size 1 with the key being the - // formatter's name. If a pipeline is configured, we add the formatter to a pipeline keyed by - // 'p:' in which it is sorted by priority. - if formatterCfg.Pipeline == "" { - pipeline := format.Pipeline{} - pipeline.Add(formatter) - pipelines[name] = &pipeline - } else { - key := fmt.Sprintf("p:%s", formatterCfg.Pipeline) - pipeline, ok := pipelines[key] - if !ok { - pipeline = &format.Pipeline{} - pipelines[key] = pipeline - } - pipeline.Add(formatter) - } } // open the cache @@ -304,37 +284,43 @@ func walkFilesystem(ctx context.Context) func() error { func applyFormatters(ctx context.Context) func() error { // create our own errgroup for concurrent formatting tasks fg, ctx := errgroup.WithContext(ctx) + // simple optimization to avoid too many concurrent formatting tasks + // we can queue them up faster than the formatters can process them, this paces things a bit + fg.SetLimit(runtime.NumCPU()) - // pre-initialise batches keyed by pipeline - batches := make(map[string][]*walk.File) - for key := range pipelines { - batches[key] = make([]*walk.File, 0, BatchSize) - } - - // for a given pipeline key, add the provided file to the current batch and trigger a format if the batch size has - // been reached - tryApply := func(key string, file *walk.File) { - // append to batch - batches[key] = append(batches[key], file) + // track batches of formatting task based on their batch keys, which are determined by the unique sequence of + // formatters which should be applied to their respective files + batches := make(map[string][]*format.Task) - // check if the batch is full + apply := func(key string, flush bool) { + // lookup the batch and exit early if it's empty batch := batches[key] - if len(batch) == BatchSize { - // get the pipeline - pipeline := pipelines[key] + if len(batch) == 0 { + return + } + + // process the batch if it's full, or we've been asked to flush partial batches + if flush || len(batch) == BatchSize { - // copy the batch - files := make([]*walk.File, len(batch)) - copy(files, batch) + // copy the batch as we re-use it for the next batch + tasks := make([]*format.Task, len(batch)) + copy(tasks, batch) - // apply to the pipeline + // asynchronously apply the sequence formatters to the batch fg.Go(func() error { - if err := pipeline.Apply(ctx, files); err != nil { - return err + // iterate the formatters, applying them in sequence to the batch of tasks + // we get the formatters list from the first task since they have all the same formatters list + for _, f := range tasks[0].Formatters { + if err := f.Apply(ctx, tasks); err != nil { + return err + } } - for _, path := range files { - processedCh <- path + + // pass each file to the processed channel + for _, task := range tasks { + processedCh <- task.File } + return nil }) @@ -343,25 +329,12 @@ func applyFormatters(ctx context.Context) func() error { } } - // format any partial batches - flushBatches := func() { - for key, pipeline := range pipelines { - - batch := batches[key] - pipeline := pipeline // capture for closure - - if len(batch) > 0 { - fg.Go(func() error { - if err := pipeline.Apply(ctx, batch); err != nil { - return fmt.Errorf("%s failure: %w", key, err) - } - for _, path := range batch { - processedCh <- path - } - return nil - }) - } - } + tryApply := func(task *format.Task) { + // append to batch + key := task.BatchKey + batches[key] = append(batches[key], task) + // try to apply + apply(key, false) } return func() error { @@ -370,35 +343,38 @@ func applyFormatters(ctx context.Context) func() error { close(processedCh) }() - // iterate the files channel, checking if any pipeline wants it, and attempting to apply if so. + // iterate the files channel for file := range filesCh { - var matches []string - for key, pipeline := range pipelines { - if !pipeline.Wants(file) { - continue + // determine a list of formatters that are interested in file + var matches []*format.Formatter + for _, formatter := range formatters { + if formatter.Wants(file) { + matches = append(matches, formatter) } - matches = append(matches, key) - tryApply(key, file) } - switch len(matches) { - case 0: - log.Debugf("no match found: %s", file.Path) + + if len(matches) == 0 { // no match, so we send it direct to the processed channel + log.Debugf("no match found: %s", file.Path) processedCh <- file - case 1: + } else { + // record the match stats.Add(stats.Matched, 1) - default: - return fmt.Errorf("path '%s' matched multiple formatters/pipelines %v", file.Path, matches) + // create a new format task, add it to a batch based on its batch key and try to apply if the batch is full + task := format.NewTask(file, matches) + tryApply(&task) } } // flush any partial batches which remain - flushBatches() + for key := range batches { + apply(key, true) + } // wait for all outstanding formatting tasks to complete if err := fg.Wait(); err != nil { - return fmt.Errorf("pipeline processing failure: %w", err) + return fmt.Errorf("formatting failure: %w", err) } return nil } diff --git a/cli/format_test.go b/cli/format_test.go index e4c3b2a3..559accc6 100644 --- a/cli/format_test.go +++ b/cli/format_test.go @@ -196,37 +196,6 @@ func TestIncludesAndExcludes(t *testing.T) { assertStats(t, as, 31, 31, 2, 0) } -func TestMatchingMultiplePipelines(t *testing.T) { - as := require.New(t) - - tempDir := test.TempExamples(t) - configPath := tempDir + "/multiple.toml" - - cfg := config2.Config{ - Formatters: map[string]*config2.Formatter{ - "echo": { - Command: "echo", - Includes: []string{"*"}, - }, - "touch": { - Command: "touch", - Includes: []string{"*"}, - }, - }, - } - - test.WriteConfig(t, configPath, cfg) - _, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) - as.ErrorContains(err, "matched multiple formatters/pipelines") - as.ErrorContains(err, "echo") - as.ErrorContains(err, "touch") - - // run with only one formatter - test.WriteConfig(t, configPath, cfg) - _, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "-f", "echo") - as.NoError(err) -} - func TestCache(t *testing.T) { as := require.New(t) @@ -604,25 +573,23 @@ func TestDeterministicOrderingInPipeline(t *testing.T) { test.WriteConfig(t, configPath, config2.Config{ Formatters: map[string]*config2.Formatter{ - // a and b should execute in lexicographical order as they have default priority 0, with c last since it has - // priority 1 + // a and b have no priority set, which means they default to 0 and should execute first + // a and b should execute in lexicographical order + // c should execute first since it has a priority of 1 "fmt-a": { Command: "test-fmt", Options: []string{"fmt-a"}, Includes: []string{"*.py"}, - Pipeline: "foo", }, "fmt-b": { Command: "test-fmt", Options: []string{"fmt-b"}, Includes: []string{"*.py"}, - Pipeline: "foo", }, "fmt-c": { Command: "test-fmt", Options: []string{"fmt-c"}, Includes: []string{"*.py"}, - Pipeline: "foo", Priority: 1, }, }, diff --git a/config/config.go b/config/config.go index 2fb5f547..d9e281f7 100644 --- a/config/config.go +++ b/config/config.go @@ -2,7 +2,6 @@ package config import ( "fmt" - "sort" "github.com/BurntSushi/toml" ) @@ -13,7 +12,6 @@ type Config struct { // Excludes is an optional list of glob patterns used to exclude certain files from all formatters. Excludes []string } - Names []string `toml:"-"` Formatters map[string]*Formatter `toml:"formatter"` } @@ -40,12 +38,5 @@ func ReadFile(path string, names []string) (cfg *Config, err error) { cfg.Formatters = filtered } - // sort the formatter names so that, as we construct pipelines, we add formatters in a determinstic fashion. This - // ensures a deterministic order even when all priority values are the same e.g. 0 - for name := range cfg.Formatters { - cfg.Names = append(cfg.Names, name) - } - sort.Strings(cfg.Names) - return } diff --git a/config/config_test.go b/config/config_test.go index 6ad74937..a99f6f81 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -59,7 +59,6 @@ func TestReadConfigFile(t *testing.T) { as.Nil(alejandra.Options) as.Equal([]string{"*.nix"}, alejandra.Includes) as.Equal([]string{"examples/nix/sources.nix"}, alejandra.Excludes) - as.Equal("nix", alejandra.Pipeline) as.Equal(1, alejandra.Priority) // deadnix @@ -69,7 +68,6 @@ func TestReadConfigFile(t *testing.T) { as.Nil(deadnix.Options) as.Equal([]string{"*.nix"}, deadnix.Includes) as.Nil(deadnix.Excludes) - as.Equal("nix", deadnix.Pipeline) as.Equal(2, deadnix.Priority) // ruby diff --git a/config/formatter.go b/config/formatter.go index 2c14c63f..9e77e6a7 100644 --- a/config/formatter.go +++ b/config/formatter.go @@ -9,8 +9,6 @@ type Formatter struct { Includes []string // Excludes is an optional list of glob patterns used to exclude certain files from this Formatter. Excludes []string - // Indicates this formatter should be executed as part of a group of formatters all sharing the same pipeline key. - Pipeline string - // Indicates the order of precedence when executing as part of a pipeline. + // Indicates the order of precedence when executing this Formatter in a sequence of Formatters. Priority int } diff --git a/docs/configure.md b/docs/configure.md index d0601af7..2be0086b 100644 --- a/docs/configure.md +++ b/docs/configure.md @@ -23,19 +23,19 @@ includes = ["*.go"] command = "black" includes = ["*.py"] +# use the priority field to control the order of execution + # run shellcheck first [formatter.shellcheck] command = "shellcheck" includes = ["*.sh"] -pipeline = "sh" -priority = 0 +priority = 0 # default is 0, but we set it here for clarity # shfmt second [formatter.shfmt] command = "shfmt" options = ["-s", "-w"] includes = ["*.sh"] -pipeline = "sh" priority = 1 ``` @@ -49,13 +49,21 @@ priority = 1 - `options` - an optional list of args to be passed to `command`. - `includes` - a list of glob patterns used to determine whether the formatter should be applied against a given path. - `excludes` - an optional list of glob patterns used to exclude certain files from this formatter. -- `pipeline` - an optional key used to group related formatters together, ensuring they are executed sequentially - against a given path. -- `priority` - indicates the order of execution when this formatter is operating as part of a pipeline. Greater - precedence is given to lower numbers, with the default being `0`. +- `priority` - influences the order of execution. Greater precedence is given to lower numbers, with the default being `0`. + +## Same file, multiple formatters? + +For each file, `treefmt` determines a list of formatters based on the configured `includes` / `excludes` rules. This list is +then sorted, first by priority (lower the value, higher the precedence) and secondly by formatter name (lexicographically). + +The resultant sequence of formatters is used to create a batch key, and similarly matched files get added to that batch +until it is full, at which point the files are passed to each formatter in turn. + +This means that `treefmt` **guarantees only one formatter will be operating on a given file at any point in time**. +Another consequence is that formatting is deterministic for a given file and a given `treefmt` configuration. -> When two or more formatters in a pipeline have the same priority, they are executed in lexicographical order to -> ensure deterministic behaviour over multiple executions. +By setting the priority fields appropriately, you can control the order in which those formatters are applied for any +files they _both happen to match on_. ## Supported Formatters diff --git a/format/formatter.go b/format/formatter.go index dbe2f114..1db9164c 100644 --- a/format/formatter.go +++ b/format/formatter.go @@ -40,43 +40,28 @@ func (f *Formatter) Executable() string { return f.executable } -func (f *Formatter) Apply(ctx context.Context, files []*walk.File, filter bool) error { +func (f *Formatter) Name() string { + return f.name +} + +func (f *Formatter) Priority() int { + return f.config.Priority +} + +func (f *Formatter) Apply(ctx context.Context, tasks []*Task) error { start := time.Now() // construct args, starting with config args := f.config.Options - // If filter is true it indicates we are executing as part of a pipeline. - // In such a scenario each formatter must sub filter the paths provided as different formatters might want different - // files in a pipeline. - if filter { - // reset the batch - f.batch = f.batch[:0] - - // filter paths - for _, file := range files { - if f.Wants(file) { - f.batch = append(f.batch, file.RelPath) - } - } - - // exit early if nothing to process - if len(f.batch) == 0 { - return nil - } - - // append paths to the args - args = append(args, f.batch...) - } else { - // exit early if nothing to process - if len(files) == 0 { - return nil - } + // exit early if nothing to process + if len(tasks) == 0 { + return nil + } - // append paths to the args - for _, file := range files { - args = append(args, file.RelPath) - } + // append paths to the args + for _, task := range tasks { + args = append(args, task.File.RelPath) } // execute the command @@ -95,7 +80,7 @@ func (f *Formatter) Apply(ctx context.Context, files []*walk.File, filter bool) // - f.log.Infof("%v files processed in %v", len(files), time.Now().Sub(start)) + f.log.Infof("%v files processed in %v", len(tasks), time.Now().Sub(start)) return nil } @@ -136,10 +121,10 @@ func NewFormatter( f.executable = executable // initialise internal state - if cfg.Pipeline == "" { - f.log = log.WithPrefix(fmt.Sprintf("format | %s", name)) + if cfg.Priority > 0 { + f.log = log.WithPrefix(fmt.Sprintf("format | %s[%d]", name, cfg.Priority)) } else { - f.log = log.WithPrefix(fmt.Sprintf("format | %s[%s]", cfg.Pipeline, name)) + f.log = log.WithPrefix(fmt.Sprintf("format | %s", name)) } f.includes, err = CompileGlobs(cfg.Includes) diff --git a/format/pipeline.go b/format/pipeline.go deleted file mode 100644 index af8b1c32..00000000 --- a/format/pipeline.go +++ /dev/null @@ -1,40 +0,0 @@ -package format - -import ( - "context" - "slices" - - "git.numtide.com/numtide/treefmt/walk" -) - -type Pipeline struct { - sequence []*Formatter -} - -func (p *Pipeline) Add(f *Formatter) { - p.sequence = append(p.sequence, f) - // sort by priority in ascending order - slices.SortFunc(p.sequence, func(a, b *Formatter) int { - return a.config.Priority - b.config.Priority - }) -} - -func (p *Pipeline) Wants(path *walk.File) bool { - var match bool - for _, f := range p.sequence { - match = f.Wants(path) - if match { - break - } - } - return match -} - -func (p *Pipeline) Apply(ctx context.Context, paths []*walk.File) error { - for _, f := range p.sequence { - if err := f.Apply(ctx, paths, len(p.sequence) > 1); err != nil { - return err - } - } - return nil -} diff --git a/format/task.go b/format/task.go new file mode 100644 index 00000000..062b4544 --- /dev/null +++ b/format/task.go @@ -0,0 +1,42 @@ +package format + +import ( + "cmp" + "slices" + + "git.numtide.com/numtide/treefmt/walk" +) + +type Task struct { + File *walk.File + Formatters []*Formatter + BatchKey string +} + +func NewTask(file *walk.File, formatters []*Formatter) Task { + // sort by priority in ascending order + slices.SortFunc(formatters, func(a, b *Formatter) int { + priorityA := a.Priority() + priorityB := b.Priority() + + result := priorityA - priorityB + if result == 0 { + // formatters with the same priority are sorted lexicographically to ensure a deterministic outcome + result = cmp.Compare(a.Name(), b.Name()) + } + return result + }) + + // construct a batch key which represents the unique sequence of formatters to be applied to file + var key string + for _, f := range formatters { + key += f.name + ":" + } + key = key[:len(key)-1] + + return Task{ + File: file, + Formatters: formatters, + BatchKey: key, + } +} diff --git a/nix/treefmt.nix b/nix/treefmt.nix index 4995e6ca..cc50122f 100644 --- a/nix/treefmt.nix +++ b/nix/treefmt.nix @@ -24,17 +24,14 @@ settings.formatter = { deadnix = { - pipeline = "nix"; priority = 1; }; statix = { - pipeline = "nix"; priority = 2; }; alejandra = { - pipeline = "nix"; priority = 3; }; diff --git a/test/examples/nixpkgs.toml b/test/examples/nixpkgs.toml index 41401cdc..e862f612 100644 --- a/test/examples/nixpkgs.toml +++ b/test/examples/nixpkgs.toml @@ -4,11 +4,9 @@ command = "deadnix" options = ["--edit"] includes = ["*.nix"] -pipeline = "nix" priority = 1 [formatter.nixpkgs-fmt] command = "nixpkgs-fmt" includes = ["*.nix"] -pipeline = "nix" priority = 2 \ No newline at end of file diff --git a/test/examples/treefmt.toml b/test/examples/treefmt.toml index 8d3ea62d..8acc9eb8 100644 --- a/test/examples/treefmt.toml +++ b/test/examples/treefmt.toml @@ -31,13 +31,11 @@ command = "alejandra" includes = ["*.nix"] # Act as an example on how to exclude specific files excludes = ["examples/nix/sources.nix"] -pipeline = "nix" priority = 1 [formatter.deadnix] command = "deadnix" includes = ["*.nix"] -pipeline = "nix" priority = 2 [formatter.ruby]