Skip to content

Commit

Permalink
feat: simplify pipeline model
Browse files Browse the repository at this point in the history
For each path we determine the list of formatters that are interested in formatting it. From there, we sort
the list of formatters first by priority (lower value, higher priority) and then by name (lexicographically).

With this information we create a batch key which is based on the unique sequence of formatters. When enough paths with the same sequence is ready we apply them in order to each formatter.

By default, with no special configuration, this model guarantees that a given path will only be processed by one formatter at a time.

If a user wishes to influence the order in which formatters are applied they can use the priority field.

Signed-off-by: Brian McGee <[email protected]>
  • Loading branch information
brianmcgee committed May 26, 2024
1 parent 6ce3d27 commit ce14ee8
Show file tree
Hide file tree
Showing 12 changed files with 147 additions and 229 deletions.
152 changes: 64 additions & 88 deletions cli/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ const (
)

var (
globalExcludes []glob.Glob
formatters map[string]*format.Formatter
pipelines map[string]*format.Pipeline
filesCh chan *walk.File
processedCh chan *walk.File
excludes []glob.Glob
formatters map[string]*format.Formatter

filesCh chan *walk.File
processedCh chan *walk.File

ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")
)
Expand Down Expand Up @@ -73,45 +73,25 @@ func (f *Format) Run() (err error) {
}

// compile global exclude globs
if globalExcludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
if excludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
return fmt.Errorf("failed to compile global excludes: %w", err)
}

// initialise pipelines
pipelines = make(map[string]*format.Pipeline)
// initialise formatters
formatters = make(map[string]*format.Formatter)

// iterate the formatters in lexicographical order
for _, name := range cfg.Names {
// init formatter
formatterCfg := cfg.Formatters[name]
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
for name, formatterCfg := range cfg.Formatters {
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, excludes)

if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
log.Debugf("formatter not found: %v", name)
log.Debugf("formatter command not found: %v", name)
continue
} else if err != nil {
return fmt.Errorf("%w: failed to initialise formatter: %v", err, name)
}

// store formatter by name
formatters[name] = formatter

// If no pipeline is configured, we add the formatter to a nominal pipeline of size 1 with the key being the
// formatter's name. If a pipeline is configured, we add the formatter to a pipeline keyed by
// 'p:<pipeline_name>' in which it is sorted by priority.
if formatterCfg.Pipeline == "" {
pipeline := format.Pipeline{}
pipeline.Add(formatter)
pipelines[name] = &pipeline
} else {
key := fmt.Sprintf("p:%s", formatterCfg.Pipeline)
pipeline, ok := pipelines[key]
if !ok {
pipeline = &format.Pipeline{}
pipelines[key] = pipeline
}
pipeline.Add(formatter)
}
}

// open the cache
Expand Down Expand Up @@ -304,37 +284,43 @@ func walkFilesystem(ctx context.Context) func() error {
func applyFormatters(ctx context.Context) func() error {
// create our own errgroup for concurrent formatting tasks
fg, ctx := errgroup.WithContext(ctx)
// simple optimization to avoid too many concurrent formatting tasks
// we can queue them up faster than the formatters can process them, this paces things a bit
fg.SetLimit(runtime.NumCPU())

// pre-initialise batches keyed by pipeline
batches := make(map[string][]*walk.File)
for key := range pipelines {
batches[key] = make([]*walk.File, 0, BatchSize)
}

// for a given pipeline key, add the provided file to the current batch and trigger a format if the batch size has
// been reached
tryApply := func(key string, file *walk.File) {
// append to batch
batches[key] = append(batches[key], file)
// track batches of formatting task based on their batch keys, which are determined by the unique sequence of
// formatters which should be applied to their respective files
batches := make(map[string][]*format.Task)

// check if the batch is full
apply := func(key string, flush bool) {
// lookup the batch and exit early if it's empty
batch := batches[key]
if len(batch) == BatchSize {
// get the pipeline
pipeline := pipelines[key]
if len(batch) == 0 {
return
}

// process the batch if it's full, or we've been asked to flush partial batches
if flush || len(batch) == BatchSize {

// copy the batch
files := make([]*walk.File, len(batch))
copy(files, batch)
// copy the batch as we re-use it for the next batch
tasks := make([]*format.Task, len(batch))
copy(tasks, batch)

// apply to the pipeline
// asynchronously apply the sequence formatters to the batch
fg.Go(func() error {
if err := pipeline.Apply(ctx, files); err != nil {
return err
// iterate the formatters, applying them in sequence to the batch of tasks
// we get the formatters list from the first task since they have all the same formatters list
for _, f := range tasks[0].Formatters {
if err := f.Apply(ctx, tasks); err != nil {
return err
}
}
for _, path := range files {
processedCh <- path

// pass each file to the processed channel
for _, task := range tasks {
processedCh <- task.File
}

return nil
})

Expand All @@ -343,25 +329,12 @@ func applyFormatters(ctx context.Context) func() error {
}
}

// format any partial batches
flushBatches := func() {
for key, pipeline := range pipelines {

batch := batches[key]
pipeline := pipeline // capture for closure

if len(batch) > 0 {
fg.Go(func() error {
if err := pipeline.Apply(ctx, batch); err != nil {
return fmt.Errorf("%s failure: %w", key, err)
}
for _, path := range batch {
processedCh <- path
}
return nil
})
}
}
tryApply := func(task *format.Task) {
// append to batch
key := task.BatchKey
batches[key] = append(batches[key], task)
// try to apply
apply(key, false)
}

return func() error {
Expand All @@ -370,35 +343,38 @@ func applyFormatters(ctx context.Context) func() error {
close(processedCh)
}()

// iterate the files channel, checking if any pipeline wants it, and attempting to apply if so.
// iterate the files channel
for file := range filesCh {
var matches []string

for key, pipeline := range pipelines {
if !pipeline.Wants(file) {
continue
// determine a list of formatters that are interested in file
var matches []*format.Formatter
for _, formatter := range formatters {
if formatter.Wants(file) {
matches = append(matches, formatter)
}
matches = append(matches, key)
tryApply(key, file)
}
switch len(matches) {
case 0:
log.Debugf("no match found: %s", file.Path)

if len(matches) == 0 {
// no match, so we send it direct to the processed channel
log.Debugf("no match found: %s", file.Path)
processedCh <- file
case 1:
} else {
// record the match
stats.Add(stats.Matched, 1)
default:
return fmt.Errorf("path '%s' matched multiple formatters/pipelines %v", file.Path, matches)
// create a new format task, add it to a batch based on its batch key and try to apply if the batch is full
task := format.NewTask(file, matches)
tryApply(&task)
}
}

// flush any partial batches which remain
flushBatches()
for key := range batches {
apply(key, true)
}

// wait for all outstanding formatting tasks to complete
if err := fg.Wait(); err != nil {
return fmt.Errorf("pipeline processing failure: %w", err)
return fmt.Errorf("formatting failure: %w", err)
}
return nil
}
Expand Down
39 changes: 3 additions & 36 deletions cli/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,37 +196,6 @@ func TestIncludesAndExcludes(t *testing.T) {
assertStats(t, as, 31, 31, 2, 0)
}

func TestMatchingMultiplePipelines(t *testing.T) {
as := require.New(t)

tempDir := test.TempExamples(t)
configPath := tempDir + "/multiple.toml"

cfg := config2.Config{
Formatters: map[string]*config2.Formatter{
"echo": {
Command: "echo",
Includes: []string{"*"},
},
"touch": {
Command: "touch",
Includes: []string{"*"},
},
},
}

test.WriteConfig(t, configPath, cfg)
_, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.ErrorContains(err, "matched multiple formatters/pipelines")
as.ErrorContains(err, "echo")
as.ErrorContains(err, "touch")

// run with only one formatter
test.WriteConfig(t, configPath, cfg)
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "-f", "echo")
as.NoError(err)
}

func TestCache(t *testing.T) {
as := require.New(t)

Expand Down Expand Up @@ -604,25 +573,23 @@ func TestDeterministicOrderingInPipeline(t *testing.T) {

test.WriteConfig(t, configPath, config2.Config{
Formatters: map[string]*config2.Formatter{
// a and b should execute in lexicographical order as they have default priority 0, with c last since it has
// priority 1
// a and b have no priority set, which means they default to 0 and should execute first
// a and b should execute in lexicographical order
// c should execute first since it has a priority of 1
"fmt-a": {
Command: "test-fmt",
Options: []string{"fmt-a"},
Includes: []string{"*.py"},
Pipeline: "foo",
},
"fmt-b": {
Command: "test-fmt",
Options: []string{"fmt-b"},
Includes: []string{"*.py"},
Pipeline: "foo",
},
"fmt-c": {
Command: "test-fmt",
Options: []string{"fmt-c"},
Includes: []string{"*.py"},
Pipeline: "foo",
Priority: 1,
},
},
Expand Down
9 changes: 0 additions & 9 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package config

import (
"fmt"
"sort"

"github.com/BurntSushi/toml"
)
Expand All @@ -13,7 +12,6 @@ type Config struct {
// Excludes is an optional list of glob patterns used to exclude certain files from all formatters.
Excludes []string
}
Names []string `toml:"-"`
Formatters map[string]*Formatter `toml:"formatter"`
}

Expand All @@ -40,12 +38,5 @@ func ReadFile(path string, names []string) (cfg *Config, err error) {
cfg.Formatters = filtered
}

// sort the formatter names so that, as we construct pipelines, we add formatters in a determinstic fashion. This
// ensures a deterministic order even when all priority values are the same e.g. 0
for name := range cfg.Formatters {
cfg.Names = append(cfg.Names, name)
}
sort.Strings(cfg.Names)

return
}
2 changes: 0 additions & 2 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ func TestReadConfigFile(t *testing.T) {
as.Nil(alejandra.Options)
as.Equal([]string{"*.nix"}, alejandra.Includes)
as.Equal([]string{"examples/nix/sources.nix"}, alejandra.Excludes)
as.Equal("nix", alejandra.Pipeline)
as.Equal(1, alejandra.Priority)

// deadnix
Expand All @@ -69,7 +68,6 @@ func TestReadConfigFile(t *testing.T) {
as.Nil(deadnix.Options)
as.Equal([]string{"*.nix"}, deadnix.Includes)
as.Nil(deadnix.Excludes)
as.Equal("nix", deadnix.Pipeline)
as.Equal(2, deadnix.Priority)

// ruby
Expand Down
4 changes: 1 addition & 3 deletions config/formatter.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ type Formatter struct {
Includes []string
// Excludes is an optional list of glob patterns used to exclude certain files from this Formatter.
Excludes []string
// Indicates this formatter should be executed as part of a group of formatters all sharing the same pipeline key.
Pipeline string
// Indicates the order of precedence when executing as part of a pipeline.
// Indicates the order of precedence when executing this Formatter in a sequence of Formatters.
Priority int
}
26 changes: 17 additions & 9 deletions docs/configure.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@ includes = ["*.go"]
command = "black"
includes = ["*.py"]

# use the priority field to control the order of execution

# run shellcheck first
[formatter.shellcheck]
command = "shellcheck"
includes = ["*.sh"]
pipeline = "sh"
priority = 0
priority = 0 # default is 0, but we set it here for clarity

# shfmt second
[formatter.shfmt]
command = "shfmt"
options = ["-s", "-w"]
includes = ["*.sh"]
pipeline = "sh"
priority = 1
```

Expand All @@ -49,13 +49,21 @@ priority = 1
- `options` - an optional list of args to be passed to `command`.
- `includes` - a list of glob patterns used to determine whether the formatter should be applied against a given path.
- `excludes` - an optional list of glob patterns used to exclude certain files from this formatter.
- `pipeline` - an optional key used to group related formatters together, ensuring they are executed sequentially
against a given path.
- `priority` - indicates the order of execution when this formatter is operating as part of a pipeline. Greater
precedence is given to lower numbers, with the default being `0`.
- `priority` - influences the order of execution. Greater precedence is given to lower numbers, with the default being `0`.

## Same file, multiple formatters?

For each file, `treefmt` determines a list of formatters based on the configured `includes` / `excludes` rules. This list is
then sorted, first by priority (lower the value, higher the precedence) and secondly by formatter name (lexicographically).

The resultant sequence of formatters is used to create a batch key, and similarly matched files get added to that batch
until it is full, at which point the files are passed to each formatter in turn.

This means that `treefmt` **guarantees only one formatter will be operating on a given file at any point in time**.
Another consequence is that formatting is deterministic for a given file and a given `treefmt` configuration.

> When two or more formatters in a pipeline have the same priority, they are executed in lexicographical order to
> ensure deterministic behaviour over multiple executions.
By setting the priority fields appropriately, you can control the order in which those formatters are applied for any
files they _both happen to match on_.

## Supported Formatters

Expand Down
Loading

0 comments on commit ce14ee8

Please sign in to comment.