diff --git a/cmd/format/format.go b/cmd/format/format.go index 40dcc094..829c91a6 100644 --- a/cmd/format/format.go +++ b/cmd/format/format.go @@ -77,9 +77,6 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) }() } - // set a prefix on the default logger - log.SetPrefix("format") - var db *bolt.DB // open the db unless --no-cache was specified @@ -161,13 +158,6 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) return fmt.Errorf("failed to create composite formatter: %w", err) } - if db != nil { - // compare formatters with the db, busting the cache if the formatters have changed - if err := formatter.BustCache(db); err != nil { - return fmt.Errorf("failed to compare formatters: %w", err) - } - } - // create a new walker for traversing the paths walker, err := walk.NewCompositeReader(walkType, cfg.TreeRoot, paths, db, statz) if err != nil { diff --git a/cmd/root_test.go b/cmd/root_test.go index 1425dd32..1ad40468 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -56,7 +56,7 @@ func TestOnUnmatched(t *testing.T) { checkOutput := func(level string, output []byte) { for _, p := range paths { - as.Contains(string(output), fmt.Sprintf("%s format: no formatter for path: %s", level, p)) + as.Contains(string(output), fmt.Sprintf("%s no formatter for path: %s", level, p)) } } @@ -605,7 +605,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { configPath := tempDir + "/touch.toml" // symlink some formatters into temp dir, so we can mess with their mod times - binPath := tempDir + "/bin" + binPath := filepath.Join(tempDir, "bin") as.NoError(os.Mkdir(binPath, 0o755)) binaries := []string{"black", "elm-format", "gofmt"} @@ -613,7 +613,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { for _, name := range binaries { src, err := exec.LookPath(name) as.NoError(err) - as.NoError(os.Symlink(src, binPath+"/"+name)) + as.NoError(os.Symlink(src, filepath.Join(binPath, name))) } // prepend our test bin directory to PATH @@ -647,7 +647,8 @@ func TestBustCacheOnFormatterChange(t *testing.T) { }) // tweak mod time of elm formatter - as.NoError(test.RecreateSymlink(t, binPath+"/"+"elm-format")) + newTime := time.Now().Add(-time.Minute) + as.NoError(test.Lutimes(t, filepath.Join(binPath, "elm-format"), newTime, newTime)) _, statz, err = treefmt(t, args...) as.NoError(err) @@ -655,7 +656,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { assertStats(t, as, statz, map[stats.Type]int{ stats.Traversed: 32, stats.Matched: 3, - stats.Formatted: 3, + stats.Formatted: 1, stats.Changed: 0, }) @@ -671,7 +672,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { }) // tweak mod time of python formatter - as.NoError(test.RecreateSymlink(t, binPath+"/"+"black")) + as.NoError(test.Lutimes(t, filepath.Join(binPath, "black"), newTime, newTime)) _, statz, err = treefmt(t, args...) as.NoError(err) @@ -679,7 +680,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { assertStats(t, as, statz, map[stats.Type]int{ stats.Traversed: 32, stats.Matched: 3, - stats.Formatted: 3, + stats.Formatted: 2, stats.Changed: 0, }) @@ -695,11 +696,12 @@ func TestBustCacheOnFormatterChange(t *testing.T) { }) // add go formatter - cfg.FormatterConfigs["go"] = &config.Formatter{ + goFormatter := &config.Formatter{ Command: "gofmt", Options: []string{"-w"}, Includes: []string{"*.go"}, } + cfg.FormatterConfigs["go"] = goFormatter test.WriteConfig(t, configPath, cfg) _, statz, err = treefmt(t, args...) @@ -708,7 +710,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { assertStats(t, as, statz, map[stats.Type]int{ stats.Traversed: 32, stats.Matched: 4, - stats.Formatted: 4, + stats.Formatted: 1, stats.Changed: 0, }) @@ -723,6 +725,35 @@ func TestBustCacheOnFormatterChange(t *testing.T) { stats.Changed: 0, }) + // tweak go formatter options + goFormatter.Options = []string{"-w", "-s"} + + test.WriteConfig(t, configPath, cfg) + + _, statz, err = treefmt(t, args...) + as.NoError(err) + + assertStats(t, as, statz, map[stats.Type]int{ + stats.Traversed: 32, + stats.Matched: 4, + stats.Formatted: 1, + stats.Changed: 0, + }) + + // add a priority + cfg.FormatterConfigs["go"].Priority = 3 + test.WriteConfig(t, configPath, cfg) + + _, statz, err = treefmt(t, args...) + as.NoError(err) + + assertStats(t, as, statz, map[stats.Type]int{ + stats.Traversed: 32, + stats.Matched: 4, + stats.Formatted: 1, + stats.Changed: 0, + }) + // remove python formatter delete(cfg.FormatterConfigs, "python") test.WriteConfig(t, configPath, cfg) @@ -733,7 +764,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { assertStats(t, as, statz, map[stats.Type]int{ stats.Traversed: 32, stats.Matched: 2, - stats.Formatted: 2, + stats.Formatted: 0, stats.Changed: 0, }) @@ -758,7 +789,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { assertStats(t, as, statz, map[stats.Type]int{ stats.Traversed: 32, stats.Matched: 1, - stats.Formatted: 1, + stats.Formatted: 0, stats.Changed: 0, }) diff --git a/format/composite.go b/format/composite.go new file mode 100644 index 00000000..8b68a393 --- /dev/null +++ b/format/composite.go @@ -0,0 +1,193 @@ +package format + +import ( + "context" + "crypto/sha256" + "errors" + "fmt" + "os" + "slices" + + "github.com/charmbracelet/log" + "github.com/gobwas/glob" + "github.com/numtide/treefmt/config" + "github.com/numtide/treefmt/stats" + "github.com/numtide/treefmt/walk" + "mvdan.cc/sh/v3/expand" +) + +const ( + batchKeySeparator = ":" +) + +var ErrFormattingFailures = errors.New("formatting failures detected") + +// CompositeFormatter handles the application of multiple Formatter instances based on global excludes and individual +// formatter configuration. +type CompositeFormatter struct { + cfg *config.Config + stats *stats.Stats + globalExcludes []glob.Glob + + unmatchedLevel log.Level + + scheduler *scheduler + formatters map[string]*Formatter +} + +// match filters the file against global excludes and returns a list of formatters that want to process the file. +func (c *CompositeFormatter) match(file *walk.File) []*Formatter { + // first check if this file has been globally excluded + if pathMatches(file.RelPath, c.globalExcludes) { + log.Debugf("path matched global excludes: %s", file.RelPath) + + return nil + } + + // a list of formatters that match this file + var matches []*Formatter + + // iterate the formatters, recording which are interested in this file + for _, formatter := range c.formatters { + if formatter.Wants(file) { + matches = append(matches, formatter) + } + } + + return matches +} + +// Apply applies the configured formatters to the given files. +func (c *CompositeFormatter) Apply(ctx context.Context, files []*walk.File) error { + var toRelease []*walk.File + + for _, file := range files { + matches := c.match(file) // match the file against the formatters + + // check if there were no matches + if len(matches) == 0 { + // log that there was no match, exiting with an error if the unmatched level was set to fatal + if c.unmatchedLevel == log.FatalLevel { + return fmt.Errorf("no formatter for path: %s", file.RelPath) + } + + log.Logf(c.unmatchedLevel, "no formatter for path: %s", file.RelPath) + + // no further processing to be done, append to the release list + toRelease = append(toRelease, file) + + // continue to the next file + continue + } + + // record there was a match + c.stats.Add(stats.Matched, 1) + + if accepted, err := c.scheduler.submit(ctx, file, matches); err != nil { + return fmt.Errorf("failed to schedule file: %w", err) + } else if !accepted { + // if a file wasn't accepted, it means there was no formatting to perform + toRelease = append(toRelease, file) + } + } + + // release files that require no further processing + // we set noCache to true as there's no need to update the cache, since we skipped those files + releaseCtx := walk.SetNoCache(ctx, true) + + for _, file := range toRelease { + if err := file.Release(releaseCtx); err != nil { + return fmt.Errorf("failed to release file: %w", err) + } + } + + return nil +} + +// signature takes anything that might affect the paths to be traversed, +// or how they are traversed, and adds it to a sha256 hash. +// This can be used to determine if there has been a material change in config. +func (c *CompositeFormatter) signature() (signature, error) { + h := sha256.New() + + // sort formatters deterministically + formatters := make([]*Formatter, 0, len(c.formatters)) + for _, f := range c.formatters { + formatters = append(formatters, f) + } + + slices.SortFunc(formatters, formatterSortFunc) + + // apply them to the hash + for _, f := range formatters { + if err := f.Hash(h); err != nil { + return nil, fmt.Errorf("failed to hash formatter: %w", err) + } + } + + // finalize + return h.Sum(nil), nil +} + +// Close finalizes the processing of the CompositeFormatter, ensuring that any remaining batches are applied and +// all formatters have completed their tasks. It returns an error if any formatting failures were detected. +func (c *CompositeFormatter) Close(ctx context.Context) error { + return c.scheduler.close(ctx) +} + +func NewCompositeFormatter( + cfg *config.Config, + statz *stats.Stats, + batchSize int, +) (*CompositeFormatter, error) { + // compile global exclude globs + globalExcludes, err := compileGlobs(cfg.Excludes) + if err != nil { + return nil, fmt.Errorf("failed to compile global excludes: %w", err) + } + + // parse unmatched log level + unmatchedLevel, err := log.ParseLevel(cfg.OnUnmatched) + if err != nil { + return nil, fmt.Errorf("invalid on-unmatched value: %w", err) + } + + // create a composite formatter, adjusting the change logging based on --fail-on-change + changeLevel := log.DebugLevel + if cfg.FailOnChange { + changeLevel = log.ErrorLevel + } + + // create formatters + formatters := make(map[string]*Formatter) + + env := expand.ListEnviron(os.Environ()...) + + for name, formatterCfg := range cfg.FormatterConfigs { + formatter, err := newFormatter(name, cfg.TreeRoot, env, formatterCfg) + + if errors.Is(err, ErrCommandNotFound) && cfg.AllowMissingFormatter { + log.Debugf("formatter command not found: %v", name) + + continue + } else if err != nil { + return nil, fmt.Errorf("failed to initialise formatter %v: %w", name, err) + } + + // store formatter by name + formatters[name] = formatter + } + + // create a scheduler for carrying out the actual formatting + scheduler := newScheduler(statz, batchSize, changeLevel, formatters) + + return &CompositeFormatter{ + cfg: cfg, + stats: statz, + globalExcludes: globalExcludes, + unmatchedLevel: unmatchedLevel, + + scheduler: scheduler, + formatters: formatters, + }, nil +} diff --git a/format/format.go b/format/format.go deleted file mode 100644 index 86528122..00000000 --- a/format/format.go +++ /dev/null @@ -1,407 +0,0 @@ -package format - -import ( - "cmp" - "context" - "errors" - "fmt" - "os" - "runtime" - "slices" - "strings" - "sync/atomic" - "time" - - "github.com/charmbracelet/log" - "github.com/gobwas/glob" - "github.com/numtide/treefmt/config" - "github.com/numtide/treefmt/stats" - "github.com/numtide/treefmt/walk" - "github.com/numtide/treefmt/walk/cache" - bolt "go.etcd.io/bbolt" - "golang.org/x/sync/errgroup" - "mvdan.cc/sh/v3/expand" -) - -const ( - batchKeySeparator = ":" -) - -var ErrFormattingFailures = errors.New("formatting failures detected") - -// batchKey represents the unique sequence of formatters to be applied to a batch of files. -// For example, "deadnix:statix:nixpkgs-fmt" indicates that deadnix should be applied first, statix second and -// nixpkgs-fmt third. -// Files are batched based on their formatting sequence, as determined by the priority and includes/excludes in the -// formatter configuration. -type batchKey string - -// sequence returns the list of formatters, by name, to be applied to a batch of files. -func (b batchKey) sequence() []string { - return strings.Split(string(b), batchKeySeparator) -} - -func newBatchKey(formatters []*Formatter) batchKey { - components := make([]string, 0, len(formatters)) - for _, f := range formatters { - components = append(components, f.Name()) - } - - return batchKey(strings.Join(components, batchKeySeparator)) -} - -// batchMap maintains a mapping between batchKey and a slice of pointers to walk.File, used to organize files into -// batches based on the sequence of formatters to be applied. -type batchMap map[batchKey][]*walk.File - -func formatterSortFunc(a, b *Formatter) int { - // sort by priority in ascending order - priorityA := a.Priority() - priorityB := b.Priority() - - result := priorityA - priorityB - if result == 0 { - // formatters with the same priority are sorted lexicographically to ensure a deterministic outcome - result = cmp.Compare(a.Name(), b.Name()) - } - - return result -} - -// Append adds a file to the batch corresponding to the given sequence of formatters and returns the updated batch. -func (b batchMap) Append(file *walk.File, matches []*Formatter) (key batchKey, batch []*walk.File) { - slices.SortFunc(matches, formatterSortFunc) - - // construct a batch key based on the sequence of formatters - key = newBatchKey(matches) - - // append to the batch - b[key] = append(b[key], file) - - // return the batch - return key, b[key] -} - -// CompositeFormatter handles the application of multiple Formatter instances based on global excludes and individual -// formatter configuration. -type CompositeFormatter struct { - stats *stats.Stats - batchSize int - globalExcludes []glob.Glob - - changeLevel log.Level - unmatchedLevel log.Level - - formatters map[string]*Formatter - - eg *errgroup.Group - batches batchMap - - // formatError indicates if at least one formatting error occurred - formatError *atomic.Bool -} - -func (c *CompositeFormatter) apply(ctx context.Context, key batchKey, batch []*walk.File) { - c.eg.Go(func() error { - var formatErrors []error - - // apply the formatters in sequence - for _, name := range key.sequence() { - formatter := c.formatters[name] - - if err := formatter.Apply(ctx, batch); err != nil { - formatErrors = append(formatErrors, err) - } - } - - // record if a format error occurred - hasErrors := len(formatErrors) > 0 - c.formatError.Store(hasErrors) - - if !hasErrors { - // record that the file was formatted - c.stats.Add(stats.Formatted, len(batch)) - } - - // Create a release context. - // We set no-cache based on whether any formatting errors occurred in this batch. - // This is to communicate with any caching layer, if used when reading files for this batch, that it should not - // update the state of any file in this batch, as we want to re-process them in later invocations. - releaseCtx := walk.SetNoCache(ctx, hasErrors) - - // post-processing - for _, file := range batch { - // check if the file has changed - changed, newInfo, err := file.Stat() - if err != nil { - return err - } - - if changed { - // record that a change in the underlying file occurred - c.stats.Add(stats.Changed, 1) - - log.Log( - c.changeLevel, "file has changed", - "path", file.RelPath, - "prev_size", file.Info.Size(), - "prev_mod_time", file.Info.ModTime().Truncate(time.Second), - "current_size", newInfo.Size(), - "current_mod_time", newInfo.ModTime().Truncate(time.Second), - ) - - // update the file info - file.Info = newInfo - } - - // release the file as there is no further processing to be done on it - if err := file.Release(releaseCtx); err != nil { - return fmt.Errorf("failed to release file: %w", err) - } - } - - return nil - }) -} - -// match filters the file against global excludes and returns a list of formatters that want to process the file. -func (c *CompositeFormatter) match(file *walk.File) []*Formatter { - // first check if this file has been globally excluded - if pathMatches(file.RelPath, c.globalExcludes) { - log.Debugf("path matched global excludes: %s", file.RelPath) - - return nil - } - - // a list of formatters that match this file - var matches []*Formatter - - // otherwise, check if any formatters are interested in it - for _, formatter := range c.formatters { - if formatter.Wants(file) { - matches = append(matches, formatter) - } - } - - return matches -} - -// Apply applies the configured formatters to the given files. -func (c *CompositeFormatter) Apply(ctx context.Context, files []*walk.File) error { - var toRelease []*walk.File - - for _, file := range files { - matches := c.match(file) // match the file against the formatters - - // check if there were no matches - if len(matches) == 0 { - // log that there was no match, exiting with an error if the unmatched level was set to fatal - if c.unmatchedLevel == log.FatalLevel { - return fmt.Errorf("no formatter for path: %s", file.RelPath) - } - - log.Logf(c.unmatchedLevel, "no formatter for path: %s", file.RelPath) - - // no further processing to be done, append to the release list - toRelease = append(toRelease, file) - - // continue to the next file - continue - } - - // record there was a match - c.stats.Add(stats.Matched, 1) - - // check if the file is new or has changed when compared to the cache entry - if file.Cache == nil || file.Cache.HasChanged(file.Info) { - // add this file to a batch and if it's full, apply formatters to the batch - if key, batch := c.batches.Append(file, matches); len(batch) == c.batchSize { - c.apply(ctx, newBatchKey(matches), batch) - // reset the batch - c.batches[key] = make([]*walk.File, 0, c.batchSize) - } - } else { - // no further processing to be done, append to the release list - toRelease = append(toRelease, file) - } - } - - // release files that require no further processing - // we set noCache to true as there's no need to update the cache, since we skipped those files - releaseCtx := walk.SetNoCache(ctx, true) - - for _, file := range toRelease { - if err := file.Release(releaseCtx); err != nil { - return fmt.Errorf("failed to release file: %w", err) - } - } - - return nil -} - -// BustCache compares the currently configured formatters with their respective entries in the db. -// If a formatter was added, removed or modified, we clear any path entries from the cache, ensuring that all paths -// get formatted with the most recent formatter set. -func (c *CompositeFormatter) BustCache(db *bolt.DB) error { - return db.Update(func(tx *bolt.Tx) error { - clearPaths := false - - pathsBucket, err := cache.BucketPaths(tx) - if err != nil { - return fmt.Errorf("failed to get paths bucket from cache: %w", err) - } - - formattersBucket, err := cache.BucketFormatters(tx) - if err != nil { - return fmt.Errorf("failed to get formatters bucket from cache: %w", err) - } - - // check for any newly configured or modified formatters - for name, formatter := range c.formatters { - stat, err := os.Lstat(formatter.Executable()) - if err != nil { - return fmt.Errorf("failed to stat formatter executable %v: %w", formatter.Executable(), err) - } - - entry, err := formattersBucket.Get(name) - if !(err == nil || errors.Is(err, cache.ErrKeyNotFound)) { - return fmt.Errorf("failed to retrieve cache entry for formatter %v: %w", name, err) - } - - isNew := errors.Is(err, cache.ErrKeyNotFound) - hasChanged := !(isNew || (entry.Size == stat.Size() && entry.Modified == stat.ModTime())) - - if isNew { - log.Debugf("formatter '%s' is new", name) - } else if hasChanged { - log.Debug("formatter '%s' has changed", - name, - "size", stat.Size(), - "modTime", stat.ModTime(), - "cachedSize", entry.Size, - "cachedModTime", entry.Modified, - ) - } - - // update overall flag - clearPaths = clearPaths || isNew || hasChanged - - // record formatters info - entry = &cache.Entry{ - Size: stat.Size(), - Modified: stat.ModTime(), - } - - if err = formattersBucket.Put(name, entry); err != nil { - return fmt.Errorf("failed to write cache entry for formatter %v: %w", name, err) - } - } - - // check for any removed formatters - if err = formattersBucket.ForEach(func(key string, _ *cache.Entry) error { - _, ok := c.formatters[key] - if !ok { - // remove the formatter entry from the cache - if err = formattersBucket.Delete(key); err != nil { - return fmt.Errorf("failed to remove cache entry for formatter %v: %w", key, err) - } - // indicate a clean is required - clearPaths = true - } - - return nil - }); err != nil { - return fmt.Errorf("failed to check cache for removed formatters: %w", err) - } - - if clearPaths { - // remove all path entries - if err := pathsBucket.DeleteAll(); err != nil { - return fmt.Errorf("failed to remove all path entries from cache: %w", err) - } - } - - return nil - }) -} - -// Close finalizes the processing of the CompositeFormatter, ensuring that any remaining batches are applied and -// all formatters have completed their tasks. It returns an error if any formatting failures were detected. -func (c *CompositeFormatter) Close(ctx context.Context) error { - // flush any partial batches that remain - for key, batch := range c.batches { - if len(batch) > 0 { - c.apply(ctx, key, batch) - } - } - - // wait for processing to complete - if err := c.eg.Wait(); err != nil { - return fmt.Errorf("failed to wait for formatters: %w", err) - } else if c.formatError.Load() { - return ErrFormattingFailures - } - - return nil -} - -func NewCompositeFormatter( - cfg *config.Config, - statz *stats.Stats, - batchSize int, -) (*CompositeFormatter, error) { - // compile global exclude globs - globalExcludes, err := compileGlobs(cfg.Excludes) - if err != nil { - return nil, fmt.Errorf("failed to compile global excludes: %w", err) - } - - // parse unmatched log level - unmatchedLevel, err := log.ParseLevel(cfg.OnUnmatched) - if err != nil { - return nil, fmt.Errorf("invalid on-unmatched value: %w", err) - } - - // create a composite formatter, adjusting the change logging based on --fail-on-change - changeLevel := log.DebugLevel - if cfg.FailOnChange { - changeLevel = log.ErrorLevel - } - - // create formatters - formatters := make(map[string]*Formatter) - - env := expand.ListEnviron(os.Environ()...) - - for name, formatterCfg := range cfg.FormatterConfigs { - formatter, err := newFormatter(name, cfg.TreeRoot, env, formatterCfg) - - if errors.Is(err, ErrCommandNotFound) && cfg.AllowMissingFormatter { - log.Debugf("formatter command not found: %v", name) - - continue - } else if err != nil { - return nil, fmt.Errorf("failed to initialise formatter %v: %w", name, err) - } - - // store formatter by name - formatters[name] = formatter - } - - // create an errgroup for asynchronously formatting - eg := errgroup.Group{} - // we use a simple heuristic to avoid too much contention by limiting the concurrency to runtime.NumCPU() - eg.SetLimit(runtime.NumCPU()) - - return &CompositeFormatter{ - stats: statz, - batchSize: batchSize, - globalExcludes: globalExcludes, - changeLevel: changeLevel, - unmatchedLevel: unmatchedLevel, - formatters: formatters, - eg: &eg, - batches: make(batchMap), - formatError: new(atomic.Bool), - }, nil -} diff --git a/format/formatter.go b/format/formatter.go index 5254565d..fcf1aaa3 100644 --- a/format/formatter.go +++ b/format/formatter.go @@ -4,9 +4,11 @@ import ( "context" "errors" "fmt" + "hash" "os" "os/exec" "regexp" + "strings" "time" "github.com/charmbracelet/log" @@ -52,6 +54,29 @@ func (f *Formatter) Executable() string { return f.executable } +// Hash adds this formatter's config and executable info to the config hash being created. +func (f *Formatter) Hash(h hash.Hash) error { + // including the name helps us to easily detect when formatter's have been added/removed + h.Write([]byte(f.name)) + // if options change, the outcome of applying the formatter might be different + h.Write([]byte(strings.Join(f.config.Options, " "))) + // if priority changes, the outcome of applying a sequence of formatters might be different + h.Write([]byte(fmt.Sprintf("%d", f.config.Priority))) + + // stat the formatter's executable + info, err := os.Lstat(f.executable) + if err != nil { + return fmt.Errorf("failed to stat formatter executable: %w", err) + } + + // include the executable's size and mod time + // if the formatter executable changes (e.g. new version) the outcome of applying the formatter might differ + h.Write([]byte(fmt.Sprintf("%d", info.Size()))) + h.Write([]byte(fmt.Sprintf("%d", info.ModTime().Unix()))) + + return nil +} + func (f *Formatter) Apply(ctx context.Context, files []*walk.File) error { start := time.Now() @@ -137,9 +162,9 @@ func newFormatter( // initialise internal state if cfg.Priority > 0 { - f.log = log.WithPrefix(fmt.Sprintf("format | %s[%d]", name, cfg.Priority)) + f.log = log.WithPrefix(fmt.Sprintf("formatter | %s[%d]", name, cfg.Priority)) } else { - f.log = log.WithPrefix(fmt.Sprintf("format | %s", name)) + f.log = log.WithPrefix(fmt.Sprintf("formatter | %s", name)) } f.includes, err = compileGlobs(cfg.Includes) diff --git a/format/formatter_test.go b/format/formatter_test.go index 795ed4f2..cff5f3b5 100644 --- a/format/formatter_test.go +++ b/format/formatter_test.go @@ -1,11 +1,15 @@ -package format_test +package format //nolint:testpackage import ( + "os" + "os/exec" + "path/filepath" "testing" + "time" "github.com/numtide/treefmt/config" - "github.com/numtide/treefmt/format" "github.com/numtide/treefmt/stats" + "github.com/numtide/treefmt/test" "github.com/stretchr/testify/require" ) @@ -20,7 +24,7 @@ func TestInvalidFormatterName(t *testing.T) { statz := stats.New() // simple "empty" config - _, err := format.NewCompositeFormatter(cfg, &statz, batchSize) + _, err := NewCompositeFormatter(cfg, &statz, batchSize) as.NoError(err) // valid name using all the acceptable characters @@ -30,7 +34,7 @@ func TestInvalidFormatterName(t *testing.T) { }, } - _, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + _, err = NewCompositeFormatter(cfg, &statz, batchSize) as.NoError(err) // test with some bad examples @@ -43,7 +47,137 @@ func TestInvalidFormatterName(t *testing.T) { }, } - _, err = format.NewCompositeFormatter(cfg, &statz, batchSize) - as.ErrorIs(err, format.ErrInvalidName) + _, err = NewCompositeFormatter(cfg, &statz, batchSize) + as.ErrorIs(err, ErrInvalidName) } } + +func TestFormatSignature(t *testing.T) { + as := require.New(t) + + const batchSize = 1024 + + statz := stats.New() + + tempDir := t.TempDir() + + // symlink some formatters into temp dir, so we can mess with their mod times + binPath := filepath.Join(tempDir, "bin") + as.NoError(os.Mkdir(binPath, 0o755)) + + binaries := []string{"black", "elm-format", "gofmt"} + + for _, name := range binaries { + src, err := exec.LookPath(name) + as.NoError(err) + as.NoError(os.Symlink(src, filepath.Join(binPath, name))) + } + + // prepend our test bin directory to PATH + t.Setenv("PATH", binPath+":"+os.Getenv("PATH")) + + // start with 2 formatters + cfg := &config.Config{ + OnUnmatched: "info", + FormatterConfigs: map[string]*config.Formatter{ + "python": { + Command: "black", + Includes: []string{"*.py"}, + }, + "elm": { + Command: "elm-format", + Options: []string{"--yes"}, + Includes: []string{"*.elm"}, + }, + }, + } + + oldSignature := assertSignatureChangedAndStable(t, as, cfg, nil) + + t.Run("change formatter mod time", func(t *testing.T) { + for _, name := range []string{"black", "elm-format"} { + t.Logf("changing mod time of %s", name) + + // tweak mod time + newTime := time.Now().Add(-time.Minute) + as.NoError(test.Lutimes(t, filepath.Join(binPath, name), newTime, newTime)) + + oldSignature = assertSignatureChangedAndStable(t, as, cfg, oldSignature) + } + }) + + t.Run("modify formatter options", func(_ *testing.T) { + f, err := NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + oldSignature = assertSignatureChangedAndStable(t, as, cfg, nil) + + // adjust python includes + python := cfg.FormatterConfigs["python"] + python.Includes = []string{"*.py", "*.pyi"} + + newHash, err := f.signature() + as.NoError(err) + as.Equal(oldSignature, newHash, "hash should not have changed") + + // adjust python excludes + python.Excludes = []string{"*.pyi"} + + newHash, err = f.signature() + as.NoError(err) + as.Equal(oldSignature, newHash, "hash should not have changed") + + // adjust python options + python.Options = []string{"-w", "-s"} + oldSignature = assertSignatureChangedAndStable(t, as, cfg, oldSignature) + + // adjust python priority + python.Priority = 100 + oldSignature = assertSignatureChangedAndStable(t, as, cfg, oldSignature) + + // adjust command + python.Command = "deadnix" + oldSignature = assertSignatureChangedAndStable(t, as, cfg, oldSignature) + }) + + t.Run("add/remove formatters", func(_ *testing.T) { + cfg.FormatterConfigs["go"] = &config.Formatter{ + Command: "gofmt", + Options: []string{"-w"}, + Includes: []string{"*.go"}, + } + + oldSignature = assertSignatureChangedAndStable(t, as, cfg, oldSignature) + + // remove python formatter + delete(cfg.FormatterConfigs, "python") + oldSignature = assertSignatureChangedAndStable(t, as, cfg, oldSignature) + + // remove elm formatter + delete(cfg.FormatterConfigs, "elm") + oldSignature = assertSignatureChangedAndStable(t, as, cfg, oldSignature) + }) +} + +func assertSignatureChangedAndStable( + t *testing.T, + as *require.Assertions, + cfg *config.Config, + oldSignature signature, +) (h signature) { + t.Helper() + + statz := stats.New() + f, err := NewCompositeFormatter(cfg, &statz, 1024) + as.NoError(err) + + newHash, err := f.signature() + as.NoError(err) + as.NotEqual(oldSignature, newHash, "hash should have changed") + + sameHash, err := f.signature() + as.NoError(err) + as.Equal(newHash, sameHash, "hash should not have changed") + + return newHash +} diff --git a/format/scheduler.go b/format/scheduler.go new file mode 100644 index 00000000..86922427 --- /dev/null +++ b/format/scheduler.go @@ -0,0 +1,260 @@ +package format + +import ( + "bytes" + "cmp" + "context" + "crypto/md5" //nolint:gosec + "fmt" + "runtime" + "slices" + "strings" + "sync/atomic" + "time" + + "github.com/charmbracelet/log" + "github.com/numtide/treefmt/stats" + "github.com/numtide/treefmt/walk" + "golang.org/x/sync/errgroup" +) + +type ( + // batch represents a collection of File pointers to be processed together. + batch []*walk.File + + // batchKey represents the unique sequence of formatters to be applied to a batch of files. + // For example, "deadnix:statix:nixpkgs-fmt" indicates that deadnix should be applied first, statix second and + // nixpkgs-fmt third. + // Files are batched based on their formatting sequence, as determined by the priority and includes/excludes in the + // formatter configuration. + batchKey string + + // signature is a sha256 hash of a sequence of formatters. + signature []byte +) + +// sequence returns the list of formatters, by name, to be applied to a batch of files. +func (b batchKey) sequence() []string { + return strings.Split(string(b), batchKeySeparator) +} + +// newBatchKey takes a list of Formatters and returns a batchKey string composed of their names joined by ":". +func newBatchKey(formatters []*Formatter) batchKey { + components := make([]string, 0, len(formatters)) + for _, f := range formatters { + components = append(components, f.Name()) + } + + return batchKey(strings.Join(components, batchKeySeparator)) +} + +type scheduler struct { + batchSize int + changeLevel log.Level + formatters map[string]*Formatter + + eg *errgroup.Group + stats *stats.Stats + + batches map[batchKey]batch + signatures map[batchKey]signature + + // formatError indicates if at least one formatting error occurred + formatError *atomic.Bool +} + +func (s *scheduler) formattersSignature(key batchKey, formatters []*Formatter) ([]byte, error) { + sig, ok := s.signatures[key] + if ok { + // return pre-computed signature + return sig, nil + } + + // generate a signature by hashing each formatter in order + h := md5.New() //nolint:gosec + for _, f := range formatters { + if err := f.Hash(h); err != nil { + return nil, fmt.Errorf("failed to hash formatter %s: %w", f.Name(), err) + } + } + + sig = h.Sum(nil) + + // store we don't have to re-compute for each file + s.signatures[key] = sig + + return sig, nil +} + +func (s *scheduler) submit( + ctx context.Context, + file *walk.File, + matches []*Formatter, +) (accepted bool, err error) { + slices.SortFunc(matches, formatterSortFunc) + + // construct a batch key based on the sequence of formatters + key := newBatchKey(matches) + + // get format signature + formattersSig, err := s.formattersSignature(key, matches) + if err != nil { + return false, fmt.Errorf("failed to get formatter's signature: %w", err) + } + + // calculate the overall signature + signature, err := file.FormatSignature(formattersSig) + if err != nil { + return false, fmt.Errorf("failed to calculate file signature: %w", err) + } + + // compare signature with last cache entry + if bytes.Equal(signature, file.CachedFormatSignature) { + // If the signature is the same as the last cache entry, there is nothing to do. + // We know from the hash signature that we have already applied this sequence of formatters (and their config) to + // this file. + // When we applied the formatters, the file had the same mod time and file size. + return false, nil + } + + // append the formatters sig to the file + // it will be necessary later to calculate a new format signature + file.FormattersSignature = formattersSig + + // append to the batch + s.batches[key] = append(s.batches[key], file) + + // schedule the batch for processing if it's full + if len(s.batches[key]) == s.batchSize { + s.schedule(ctx, key, s.batches[key]) + // reset the batch + s.batches[key] = make([]*walk.File, 0, s.batchSize) + } + + return true, nil +} + +// schedule begins processing a batch in the background. +func (s *scheduler) schedule(ctx context.Context, key batchKey, batch []*walk.File) { + s.eg.Go(func() error { + var formatErrors []error + + // apply the formatters in sequence + for _, name := range key.sequence() { + formatter := s.formatters[name] + + if err := formatter.Apply(ctx, batch); err != nil { + formatErrors = append(formatErrors, err) + } + } + + // record if a format error occurred + hasErrors := len(formatErrors) > 0 + + // update overall error tracking + s.formatError.Store(hasErrors) + + if !hasErrors { + // record that the file was formatted + s.stats.Add(stats.Formatted, len(batch)) + } + + // Create a release context. + // We set no-cache based on whether any formatting errors occurred in this batch. + // This is to communicate with any caching layer, if used when reading files for this batch, that it should not + // update the state of any file in this batch, as we want to re-process them in later invocations. + releaseCtx := walk.SetNoCache(ctx, hasErrors) + + // post-processing + for _, file := range batch { + // check if the file has changed + changed, newInfo, err := file.Stat() + if err != nil { + return fmt.Errorf("failed to stat file: %w", err) + } + + if changed { + // record the change + s.stats.Add(stats.Changed, 1) + + // log the change (useful for diagnosing issues) + log.Log( + s.changeLevel, "file has changed", + "path", file.RelPath, + "prev_size", file.Info.Size(), + "prev_mod_time", file.Info.ModTime().Truncate(time.Second), + "current_size", newInfo.Size(), + "current_mod_time", newInfo.ModTime().Truncate(time.Second), + ) + + // record the new file info + file.FormattedInfo = newInfo + } + + // release the file as there is no further processing to be done on it + if err := file.Release(releaseCtx); err != nil { + return fmt.Errorf("failed to release file: %w", err) + } + } + + return nil + }) +} + +func (s *scheduler) close(ctx context.Context) error { + // schedule any partial batches that remain + for key, batch := range s.batches { + if len(batch) > 0 { + s.schedule(ctx, key, batch) + } + } + + // wait for processing to complete + if err := s.eg.Wait(); err != nil { + return fmt.Errorf("failed to wait for formatters: %w", err) + } else if s.formatError.Load() { + return ErrFormattingFailures + } + + return nil +} + +// formatterSortFunc sorts formatters by their priority in ascending order; ties are resolved by lexicographic order of +// names. +func formatterSortFunc(a, b *Formatter) int { + // sort by priority in ascending order + priorityA := a.Priority() + priorityB := b.Priority() + + result := priorityA - priorityB + if result == 0 { + // formatters with the same priority are sorted lexicographically to ensure a deterministic outcome + result = cmp.Compare(a.Name(), b.Name()) + } + + return result +} + +func newScheduler( + statz *stats.Stats, + batchSize int, + changeLevel log.Level, + formatters map[string]*Formatter, +) *scheduler { + eg := &errgroup.Group{} + // we use a simple heuristic to avoid too much contention by limiting the concurrency to runtime.NumCPU() + eg.SetLimit(runtime.NumCPU()) + + return &scheduler{ + batchSize: batchSize, + changeLevel: changeLevel, + formatters: formatters, + + eg: eg, + stats: statz, + + batches: make(map[batchKey]batch), + signatures: make(map[batchKey]signature), + formatError: &atomic.Bool{}, + } +} diff --git a/format/task.go b/format/task.go deleted file mode 100644 index 922dc6db..00000000 --- a/format/task.go +++ /dev/null @@ -1,45 +0,0 @@ -package format - -import ( - "cmp" - "slices" - - "github.com/numtide/treefmt/walk" -) - -type Task struct { - File *walk.File - Formatters []*Formatter - BatchKey string - Errors []error -} - -func NewTask(file *walk.File, formatters []*Formatter) Task { - // sort by priority in ascending order - slices.SortFunc(formatters, func(a, b *Formatter) int { - priorityA := a.Priority() - priorityB := b.Priority() - - result := priorityA - priorityB - if result == 0 { - // formatters with the same priority are sorted lexicographically to ensure a deterministic outcome - result = cmp.Compare(a.Name(), b.Name()) - } - - return result - }) - - // construct a batch key which represents the unique sequence of formatters to be applied to file - var key string - for _, f := range formatters { - key += f.name + ":" - } - - key = key[:len(key)-1] - - return Task{ - File: file, - Formatters: formatters, - BatchKey: key, - } -} diff --git a/go.mod b/go.mod index 7612b490..c41d6525 100644 --- a/go.mod +++ b/go.mod @@ -8,13 +8,14 @@ require ( github.com/charmbracelet/log v0.4.0 github.com/gobwas/glob v0.2.3 github.com/otiai10/copy v1.14.0 + github.com/rogpeppe/go-internal v1.12.0 github.com/spf13/cobra v1.8.1 github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.19.0 github.com/stretchr/testify v1.9.0 - github.com/vmihailenco/msgpack/v5 v5.4.1 go.etcd.io/bbolt v1.3.11 golang.org/x/sync v0.8.0 + golang.org/x/sys v0.25.0 mvdan.cc/sh/v3 v3.9.0 ) @@ -43,11 +44,9 @@ require ( github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.6.0 // indirect github.com/subosito/gotenv v1.6.0 // indirect - github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.9.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect - golang.org/x/sys v0.25.0 // indirect golang.org/x/term v0.24.0 // indirect golang.org/x/text v0.18.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect diff --git a/go.sum b/go.sum index 9cff6309..846246d1 100644 --- a/go.sum +++ b/go.sum @@ -99,10 +99,6 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= -github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8= -github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= -github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= -github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0= go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= diff --git a/nix/packages/treefmt/gomod2nix.toml b/nix/packages/treefmt/gomod2nix.toml index 9f6a2cf6..71bb3cdc 100644 --- a/nix/packages/treefmt/gomod2nix.toml +++ b/nix/packages/treefmt/gomod2nix.toml @@ -70,6 +70,9 @@ schema = 3 [mod."github.com/rivo/uniseg"] version = "v0.4.7" hash = "sha256-rDcdNYH6ZD8KouyyiZCUEy8JrjOQoAkxHBhugrfHjFo=" + [mod."github.com/rogpeppe/go-internal"] + version = "v1.12.0" + hash = "sha256-qvDNCe3l84/LgrA8X4O15e1FeDcazyX91m9LmXGXX6M=" [mod."github.com/sagikazarmark/locafero"] version = "v0.4.0" hash = "sha256-7I1Oatc7GAaHgAqBFO6Tv4IbzFiYeU9bJAfJhXuWaXk=" @@ -100,12 +103,6 @@ schema = 3 [mod."github.com/subosito/gotenv"] version = "v1.6.0" hash = "sha256-LspbjTniiq2xAICSXmgqP7carwlNaLqnCTQfw2pa80A=" - [mod."github.com/vmihailenco/msgpack/v5"] - version = "v5.4.1" - hash = "sha256-pDplX6xU6UpNLcFbO1pRREW5vCnSPvSU+ojAwFDv3Hk=" - [mod."github.com/vmihailenco/tagparser/v2"] - version = "v2.0.0" - hash = "sha256-M9QyaKhSmmYwsJk7gkjtqu9PuiqZHSmTkous8VWkWY0=" [mod."go.etcd.io/bbolt"] version = "v1.3.11" hash = "sha256-SVWYZtE9TBgAo8xJSmo9DtSwuNa056N3zGvPLDJgiA8=" diff --git a/test/temp.go b/test/test.go similarity index 71% rename from test/temp.go rename to test/test.go index 274cb1dc..f243fbe7 100644 --- a/test/temp.go +++ b/test/test.go @@ -1,6 +1,7 @@ package test import ( + "fmt" "os" "testing" "time" @@ -9,6 +10,7 @@ import ( "github.com/numtide/treefmt/config" cp "github.com/otiai10/copy" "github.com/stretchr/testify/require" + "golang.org/x/sys/unix" ) func WriteConfig(t *testing.T, path string, cfg *config.Config) { @@ -60,13 +62,20 @@ func TempFile(t *testing.T, dir string, pattern string, contents *string) *os.Fi return file } -func RecreateSymlink(t *testing.T, path string) error { +// Lutimes is a convenience wrapper for using unix.Lutimes +// TODO: this will need adapted if we support Windows. +func Lutimes(t *testing.T, path string, atime time.Time, mtime time.Time) error { t.Helper() - src, err := os.Readlink(path) + var utimes [2]unix.Timeval + utimes[0] = unix.NsecToTimeval(atime.UnixNano()) + utimes[1] = unix.NsecToTimeval(mtime.UnixNano()) - require.NoError(t, err, "failed to read symlink") - require.NoError(t, os.Remove(path), "failed to remove symlink") + // Change the timestamps of the path. If it's a symlink, it updates the symlink's timestamps, not the target's. + err := unix.Lutimes(path, utimes[0:]) + if err != nil { + return fmt.Errorf("failed to change times: %w", err) + } - return os.Symlink(src, path) + return nil } diff --git a/walk/cache/bucket.go b/walk/cache/bucket.go deleted file mode 100644 index dd2334e0..00000000 --- a/walk/cache/bucket.go +++ /dev/null @@ -1,100 +0,0 @@ -package cache - -import ( - "fmt" - - "github.com/vmihailenco/msgpack/v5" - bolt "go.etcd.io/bbolt" -) - -const ( - bucketPaths = "paths" - bucketFormatters = "formatters" -) - -var ErrKeyNotFound = fmt.Errorf("key not found") - -type Bucket[V any] struct { - bucket *bolt.Bucket -} - -func (b *Bucket[V]) Size() int { - return b.bucket.Stats().KeyN -} - -func (b *Bucket[V]) Get(key string) (*V, error) { - bytes := b.bucket.Get([]byte(key)) - if bytes == nil { - return nil, ErrKeyNotFound - } - - var value V - if err := msgpack.Unmarshal(bytes, &value); err != nil { - return nil, fmt.Errorf("failed to unmarshal cache entry for key '%v': %w", key, err) - } - - return &value, nil -} - -func (b *Bucket[V]) Put(key string, value *V) error { - if bytes, err := msgpack.Marshal(value); err != nil { - return fmt.Errorf("failed to marshal cache entry for key %v: %w", key, err) - } else if err = b.bucket.Put([]byte(key), bytes); err != nil { - return fmt.Errorf("failed to put cache entry for key %v: %w", key, err) - } - - return nil -} - -func (b *Bucket[V]) Delete(key string) error { - return b.bucket.Delete([]byte(key)) -} - -func (b *Bucket[V]) DeleteAll() error { - c := b.bucket.Cursor() - for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() { - if err := c.Delete(); err != nil { - return fmt.Errorf("failed to remove cache entry for key %s: %w", string(k), err) - } - } - - return nil -} - -func (b *Bucket[V]) ForEach(f func(string, *V) error) error { - return b.bucket.ForEach(func(key, bytes []byte) error { - var value V - if err := msgpack.Unmarshal(bytes, &value); err != nil { - return fmt.Errorf("failed to unmarshal cache entry for key '%v': %w", key, err) - } - - return f(string(key), &value) - }) -} - -func BucketPaths(tx *bolt.Tx) (*Bucket[Entry], error) { - return cacheBucket(bucketPaths, tx) -} - -func BucketFormatters(tx *bolt.Tx) (*Bucket[Entry], error) { - return cacheBucket(bucketFormatters, tx) -} - -func cacheBucket(name string, tx *bolt.Tx) (*Bucket[Entry], error) { - var ( - err error - b *bolt.Bucket - ) - - if tx.Writable() { - b, err = tx.CreateBucketIfNotExists([]byte(name)) - } else { - b = tx.Bucket([]byte(name)) - } - - if err != nil { - return nil, fmt.Errorf("failed to get/create bucket %s: %w", bucketPaths, err) - } - - return &Bucket[Entry]{b}, nil -} diff --git a/walk/cache/cache.go b/walk/cache/cache.go index fba706f0..fc6c94fc 100644 --- a/walk/cache/cache.go +++ b/walk/cache/cache.go @@ -1,24 +1,18 @@ package cache import ( - "crypto/sha1" //nolint:gosec + "crypto/sha256" "encoding/hex" "fmt" - "io/fs" "time" "github.com/adrg/xdg" bolt "go.etcd.io/bbolt" ) -type Entry struct { - Size int64 - Modified time.Time -} - -func (e *Entry) HasChanged(info fs.FileInfo) bool { - return !(e.Modified == info.ModTime() && e.Size == info.Size()) -} +const ( + bucketPaths = "paths" +) func Open(root string) (*bolt.DB, error) { var ( @@ -29,7 +23,7 @@ func Open(root string) (*bolt.DB, error) { // Otherwise, the database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/.db`, where is // determined by hashing the treeRoot path. // This associates a given treeRoot with a given instance of the cache. - digest := sha1.Sum([]byte(root)) //nolint:gosec + digest := sha256.Sum256([]byte(root)) name := hex.EncodeToString(digest[:]) if path, err = xdg.CacheFile(fmt.Sprintf("treefmt/eval-cache/%v.db", name)); err != nil { @@ -42,29 +36,36 @@ func Open(root string) (*bolt.DB, error) { return nil, err } + // ensure bucket exist + err = db.Update(func(tx *bolt.Tx) error { + _, err := tx.CreateBucketIfNotExists([]byte(bucketPaths)) + + return err + }) + if err != nil { + return nil, fmt.Errorf("failed to create bucket: %w", err) + } + return db, nil } -func EnsureBuckets(db *bolt.DB) error { - // force creation of buckets if they don't already exist - return db.Update(func(tx *bolt.Tx) error { - if _, err := BucketPaths(tx); err != nil { - return err - } +func PathsBucket(tx *bolt.Tx) *bolt.Bucket { + return tx.Bucket([]byte("paths")) +} - _, err := BucketFormatters(tx) +func deleteAll(bucket *bolt.Bucket) error { + c := bucket.Cursor() + for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() { + if err := c.Delete(); err != nil { + return fmt.Errorf("failed to remove cache entry for key %s: %w", string(k), err) + } + } - return err - }) + return nil } func Clear(db *bolt.DB) error { return db.Update(func(tx *bolt.Tx) error { - bucket, err := BucketPaths(tx) - if err != nil { - return fmt.Errorf("failed to get paths bucket: %w", err) - } - - return bucket.DeleteAll() + return deleteAll(PathsBucket(tx)) }) } diff --git a/walk/cached.go b/walk/cached.go index cfecc902..4f39037c 100644 --- a/walk/cached.go +++ b/walk/cached.go @@ -52,20 +52,17 @@ func (c *CachedReader) process() error { } return c.db.Update(func(tx *bolt.Tx) error { - // get the paths bucket - bucket, err := cache.BucketPaths(tx) - if err != nil { - return fmt.Errorf("failed to get bucket: %w", err) - } + bucket := cache.PathsBucket(tx) - // for each file in the batch, add a new cache entry with update size and mod time. + // for each file in the batch, calculate its new format signature and update the bucket entry for _, file := range batch { - entry := &cache.Entry{ - Size: file.Info.Size(), - Modified: file.Info.ModTime(), + signature, err := file.NewFormatSignature() + if err != nil { + return fmt.Errorf("failed to calculate signature for path %s: %w", file.RelPath, err) } - if err = bucket.Put(file.RelPath, entry); err != nil { - return fmt.Errorf("failed to put entry for path %s: %w", file.RelPath, err) + + if err := bucket.Put([]byte(file.RelPath), signature); err != nil { + return fmt.Errorf("failed to put format signature for path %s: %w", file.RelPath, err) } } @@ -92,7 +89,8 @@ func (c *CachedReader) process() error { func (c *CachedReader) Read(ctx context.Context, files []*File) (n int, err error) { err = c.db.View(func(tx *bolt.Tx) error { // get paths bucket - bucket, err := cache.BucketPaths(tx) + bucket := cache.PathsBucket(tx) + if err != nil { return fmt.Errorf("failed to get bucket: %w", err) } @@ -104,13 +102,7 @@ func (c *CachedReader) Read(ctx context.Context, files []*File) (n int, err erro for i := 0; i < n; i++ { file := files[i] - // lookup cache entry and append to the file - var bucketErr error - - file.Cache, bucketErr = bucket.Get(file.RelPath) - if !(bucketErr == nil || errors.Is(bucketErr, cache.ErrKeyNotFound)) { - return bucketErr - } + file.CachedFormatSignature = bucket.Get([]byte(file.RelPath)) // set a release function which inserts this file into the update channel file.AddReleaseFunc(func(ctx context.Context) error { @@ -145,19 +137,13 @@ func (c *CachedReader) Close() error { // NewCachedReader creates a cache Reader instance, backed by a bolt DB and delegating reads to delegate. func NewCachedReader(db *bolt.DB, batchSize int, delegate Reader) (*CachedReader, error) { - // force the creation of the necessary buckets if we're dealing with an empty db - if err := cache.EnsureBuckets(db); err != nil { - return nil, fmt.Errorf("failed to create cache buckets: %w", err) - } - - // create an error group for managing the processing loop - eg := &errgroup.Group{} + eg := &errgroup.Group{} // create an error group for managing the processing loop r := &CachedReader{ db: db, batchSize: batchSize, delegate: delegate, - log: log.WithPrefix("walk[cache]"), + log: log.WithPrefix("walk | cache"), eg: eg, updateCh: make(chan *File, batchSize*runtime.NumCPU()), } diff --git a/walk/cached_test.go b/walk/cached_test.go deleted file mode 100644 index d74f981f..00000000 --- a/walk/cached_test.go +++ /dev/null @@ -1,137 +0,0 @@ -package walk_test - -import ( - "context" - "errors" - "io" - "os" - "path/filepath" - "testing" - "time" - - "github.com/numtide/treefmt/stats" - "github.com/numtide/treefmt/test" - "github.com/numtide/treefmt/walk" - "github.com/numtide/treefmt/walk/cache" - "github.com/stretchr/testify/require" -) - -func TestCachedReader(t *testing.T) { - as := require.New(t) - - batchSize := 1024 - tempDir := test.TempExamples(t) - - readAll := func(path string) (totalCount, newCount, changeCount int) { - statz := stats.New() - - db, err := cache.Open(tempDir) - as.NoError(err) - defer db.Close() - - delegate := walk.NewFilesystemReader(tempDir, path, &statz, batchSize) - reader, err := walk.NewCachedReader(db, batchSize, delegate) - as.NoError(err) - - for { - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) - - files := make([]*walk.File, 8) - n, err := reader.Read(ctx, files) - - totalCount += n - - for idx := 0; idx < n; idx++ { - file := files[idx] - - if file.Cache == nil { - newCount++ - } else if file.Cache.HasChanged(file.Info) { - changeCount++ - } - - as.NoError(file.Release(ctx)) - } - - cancel() - - if errors.Is(err, io.EOF) { - break - } - } - - as.NoError(reader.Close()) - - return totalCount, newCount, changeCount - } - - totalCount, newCount, changeCount := readAll("") - as.Equal(32, totalCount) - as.Equal(32, newCount) - as.Equal(0, changeCount) - - // read again, should be no changes - totalCount, newCount, changeCount = readAll("") - as.Equal(32, totalCount) - as.Equal(0, newCount) - as.Equal(0, changeCount) - - // change mod times on some files and try again - // we subtract a second to account for the 1 second granularity of modtime according to POSIX - modTime := time.Now().Add(-1 * time.Second) - - as.NoError(os.Chtimes(filepath.Join(tempDir, "treefmt.toml"), time.Now(), modTime)) - as.NoError(os.Chtimes(filepath.Join(tempDir, "shell/foo.sh"), time.Now(), modTime)) - as.NoError(os.Chtimes(filepath.Join(tempDir, "haskell/Nested/Foo.hs"), time.Now(), modTime)) - - totalCount, newCount, changeCount = readAll("") - as.Equal(32, totalCount) - as.Equal(0, newCount) - as.Equal(3, changeCount) - - // create some files and try again - _, err := os.Create(filepath.Join(tempDir, "new.txt")) - as.NoError(err) - - _, err = os.Create(filepath.Join(tempDir, "fizz.go")) - as.NoError(err) - - totalCount, newCount, changeCount = readAll("") - as.Equal(34, totalCount) - as.Equal(2, newCount) - as.Equal(0, changeCount) - - // modify some files - f, err := os.OpenFile(filepath.Join(tempDir, "new.txt"), os.O_WRONLY, 0o644) - as.NoError(err) - _, err = f.Write([]byte("foo")) - as.NoError(err) - as.NoError(f.Close()) - - f, err = os.OpenFile(filepath.Join(tempDir, "fizz.go"), os.O_WRONLY, 0o644) - as.NoError(err) - _, err = f.Write([]byte("bla")) - as.NoError(err) - as.NoError(f.Close()) - - totalCount, newCount, changeCount = readAll("") - as.Equal(34, totalCount) - as.Equal(0, newCount) - as.Equal(2, changeCount) - - // read some paths within the root - totalCount, newCount, changeCount = readAll("go") - as.Equal(2, totalCount) - as.Equal(0, newCount) - as.Equal(0, changeCount) - - totalCount, newCount, changeCount = readAll("elm/src") - as.Equal(1, totalCount) - as.Equal(0, newCount) - as.Equal(0, changeCount) - - totalCount, newCount, changeCount = readAll("haskell") - as.Equal(7, totalCount) - as.Equal(0, newCount) - as.Equal(0, changeCount) -} diff --git a/walk/filesystem.go b/walk/filesystem.go index 000aa9d8..7c22e605 100644 --- a/walk/filesystem.go +++ b/walk/filesystem.go @@ -128,7 +128,7 @@ func NewFilesystemReader( eg := errgroup.Group{} r := FilesystemReader{ - log: log.WithPrefix("walk[filesystem]"), + log: log.WithPrefix("walk | filesystem"), root: root, path: path, batchSize: batchSize, diff --git a/walk/git.go b/walk/git.go index 7681d150..fd079e68 100644 --- a/walk/git.go +++ b/walk/git.go @@ -119,6 +119,6 @@ func NewGitReader( path: path, stats: statz, eg: &errgroup.Group{}, - log: log.WithPrefix("walk[git]"), + log: log.WithPrefix("walk | git"), }, nil } diff --git a/walk/walk.go b/walk/walk.go index 9480563f..8c72735a 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -2,16 +2,15 @@ package walk import ( "context" + "crypto/md5" //nolint:gosec "errors" "fmt" "io" "io/fs" "os" "path/filepath" - "time" "github.com/numtide/treefmt/stats" - "github.com/numtide/treefmt/walk/cache" bolt "go.etcd.io/bbolt" ) @@ -35,12 +34,59 @@ type File struct { RelPath string Info fs.FileInfo - // Cache is the latest entry found for this file, if one exists. - Cache *cache.Entry + // FormattedInfo is the result of os.stat after formatting the file. + FormattedInfo fs.FileInfo + + // FormattersSignature represents the sequence of formatters and their config that was applied to this file. + FormattersSignature []byte + + // CachedFormatSignature is the last FormatSignature generated for this file, retrieved from the cache. + CachedFormatSignature []byte releaseFuncs []ReleaseFunc } +func formatSignature(formattersSig []byte, info fs.FileInfo) []byte { + h := md5.New() //nolint:gosec + h.Write(formattersSig) + // add mod time and size + h.Write([]byte(fmt.Sprintf("%v %v", info.ModTime().Unix(), info.Size()))) + + return h.Sum(nil) +} + +// FormatSignature takes the file's info from when it was traversed and appends it to formattersSig, generating +// a unique format signature which encapsulates the sequence of formatters that were applied to this file and the +// outcome. +func (f *File) FormatSignature(formattersSig []byte) ([]byte, error) { + if f.Info == nil { + return nil, fmt.Errorf("file has no info") + } + + return formatSignature(formattersSig, f.Info), nil +} + +// NewFormatSignature takes the file's info after being formatted and appends it to FormattersSignature, generating +// a unique format signature which encapsulates the sequence of formatters that were applied to this file and the +// outcome. +func (f *File) NewFormatSignature() ([]byte, error) { + info := f.FormattedInfo // we start by assuming the file was formatted + if info == nil { + // if it wasn't, we fall back to the original file info from when it was first read + info = f.Info + } + + if info == nil { + // ensure info is not nil + return nil, fmt.Errorf("file has no info") + } else if f.FormattersSignature == nil { + // ensure we have a formatters signature + return nil, fmt.Errorf("file has no formatters signature") + } + + return formatSignature(f.FormattersSignature, info), nil +} + // Release calls all registered release functions for the File and returns an error if any function fails. // Accepts a context which can be used to pass parameters to the release hooks. func (f *File) Release(ctx context.Context) error { @@ -76,7 +122,7 @@ func (f *File) Stat() (changed bool, info fs.FileInfo, err error) { // Some formatters mess with the mod time (e.g. dos2unix) but not to the same precision, // triggering false positives. // We truncate everything below a second. - if f.Info.ModTime().Truncate(time.Second) != current.ModTime().Truncate(time.Second) { + if f.Info.ModTime().Unix() != current.ModTime().Unix() { return true, current, nil }