diff --git a/internal/cache/cache.go b/internal/cache/cache.go index 7e256f7d..88169f71 100644 --- a/internal/cache/cache.go +++ b/internal/cache/cache.go @@ -4,20 +4,23 @@ import ( "context" "crypto/sha1" "encoding/hex" - "errors" "fmt" "io/fs" "os" "path/filepath" "time" + "git.numtide.com/numtide/treefmt/internal/format" + "github.com/charmbracelet/log" + "github.com/adrg/xdg" "github.com/vmihailenco/msgpack/v5" bolt "go.etcd.io/bbolt" ) const ( - modifiedBucket = "modified" + pathsBucket = "paths" + formattersBucket = "formatters" ) // Entry represents a cache entry, indicating the last size and modified time for a file path. @@ -33,7 +36,9 @@ var db *bolt.DB // // The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/.db`, where is determined by hashing // the treeRoot path. This associates a given treeRoot with a given instance of the cache. -func Open(treeRoot string, clean bool) (err error) { +func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) { + l := log.WithPrefix("cache") + // determine a unique and consistent db name for the tree root h := sha1.New() h.Write([]byte(treeRoot)) @@ -45,27 +50,84 @@ func Open(treeRoot string, clean bool) (err error) { return fmt.Errorf("%w: could not resolve local path for the cache", err) } - // force a clean of the cache if specified - if clean { - err := os.Remove(path) - if errors.Is(err, os.ErrNotExist) { - err = nil - } else if err != nil { - return fmt.Errorf("%w: failed to clear cache", err) - } - } - db, err = bolt.Open(path, 0o600, nil) if err != nil { return fmt.Errorf("%w: failed to open cache", err) } err = db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte(modifiedBucket)) - if errors.Is(err, bolt.ErrBucketExists) { + // create bucket for tracking paths + pathsBucket, err := tx.CreateBucketIfNotExists([]byte(pathsBucket)) + if err != nil { + return fmt.Errorf("%w: failed to create paths bucket", err) + } + + // create bucket for tracking formatters + formattersBucket, err := tx.CreateBucketIfNotExists([]byte(formattersBucket)) + if err != nil { + return fmt.Errorf("%w: failed to create formatters bucket", err) + } + + // check for any newly configured or modified formatters + for name, formatter := range formatters { + + stat, err := os.Lstat(formatter.Executable()) + if err != nil { + return fmt.Errorf("%w: failed to state formatter executable", err) + } + + entry, err := getEntry(formattersBucket, name) + if err != nil { + return fmt.Errorf("%w: failed to retrieve entry for formatter", err) + } + + clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime()) + l.Debug( + "checking if formatter has changed", + "name", name, + "clean", clean, + "entry", entry, + "stat", stat, + ) + + // record formatters info + entry = &Entry{ + Size: stat.Size(), + Modified: stat.ModTime(), + } + + if err = putEntry(formattersBucket, name, entry); err != nil { + return fmt.Errorf("%w: failed to write formatter entry", err) + } + } + + // check for any removed formatters + if err = formattersBucket.ForEach(func(key []byte, _ []byte) error { + _, ok := formatters[string(key)] + if !ok { + // remove the formatter entry from the cache + if err = formattersBucket.Delete(key); err != nil { + return fmt.Errorf("%w: failed to remove formatter entry", err) + } + // indicate a clean is required + clean = true + } return nil + }); err != nil { + return fmt.Errorf("%w: failed to check for removed formatters", err) + } + + if clean { + // remove all path entries + c := pathsBucket.Cursor() + for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() { + if err = c.Delete(); err != nil { + return fmt.Errorf("%w: failed to remove path entry", err) + } + } } - return err + + return nil }) return @@ -93,11 +155,24 @@ func getEntry(bucket *bolt.Bucket, path string) (*Entry, error) { } } +// putEntry is a helper for writing cache entries into bolt. +func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error { + bytes, err := msgpack.Marshal(entry) + if err != nil { + return fmt.Errorf("%w: failed to marshal cache entry", err) + } + + if err = bucket.Put([]byte(path), bytes); err != nil { + return fmt.Errorf("%w: failed to put cache entry", err) + } + return nil +} + // ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh. // It determines if a path is new or has changed by comparing against cache entries. func ChangeSet(ctx context.Context, root string, pathsCh chan<- string) error { return db.Update(func(tx *bolt.Tx) error { - bucket := tx.Bucket([]byte(modifiedBucket)) + bucket := tx.Bucket([]byte(pathsBucket)) return filepath.Walk(root, func(path string, info fs.FileInfo, err error) error { if err != nil { @@ -142,13 +217,9 @@ func Update(paths []string) (int, error) { var changes int return changes, db.Update(func(tx *bolt.Tx) error { - bucket := tx.Bucket([]byte(modifiedBucket)) + bucket := tx.Bucket([]byte(pathsBucket)) for _, path := range paths { - if path == "" { - continue - } - cached, err := getEntry(bucket, path) if err != nil { return err @@ -166,18 +237,13 @@ func Update(paths []string) (int, error) { continue } - cacheInfo := Entry{ + entry := Entry{ Size: pathInfo.Size(), Modified: pathInfo.ModTime(), } - bytes, err := msgpack.Marshal(cacheInfo) - if err != nil { - return fmt.Errorf("%w: failed to marshal mod time", err) - } - - if err = bucket.Put([]byte(path), bytes); err != nil { - return fmt.Errorf("%w: failed to put mode time", err) + if err = putEntry(bucket, path, &entry); err != nil { + return err } } diff --git a/internal/cli/format.go b/internal/cli/format.go index 5842178b..174e61f0 100644 --- a/internal/cli/format.go +++ b/internal/cli/format.go @@ -2,6 +2,7 @@ package cli import ( "context" + "errors" "fmt" "os" "os/signal" @@ -71,7 +72,7 @@ func (f *Format) Run() error { } err = formatter.Init(name, globalExcludes) - if err == format.ErrFormatterNotFound && Cli.AllowMissingFormatter { + if errors.Is(err, format.ErrFormatterNotFound) && Cli.AllowMissingFormatter { l.Debugf("formatter not found: %v", name) // remove this formatter delete(cfg.Formatters, name) @@ -82,7 +83,7 @@ func (f *Format) Run() error { ctx = format.RegisterFormatters(ctx, cfg.Formatters) - if err = cache.Open(Cli.TreeRoot, Cli.ClearCache); err != nil { + if err = cache.Open(Cli.TreeRoot, Cli.ClearCache, cfg.Formatters); err != nil { return err } @@ -110,12 +111,15 @@ func (f *Format) Run() error { eg.Go(func() error { batchSize := 1024 batch := make([]string, batchSize) + batch = batch[:0] var pending, completed, changes int LOOP: for { select { + case <-ctx.Done(): + return ctx.Err() case _, ok := <-pendingCh: if ok { pending += 1 diff --git a/internal/cli/format_test.go b/internal/cli/format_test.go index 2521a933..a172755b 100644 --- a/internal/cli/format_test.go +++ b/internal/cli/format_test.go @@ -2,6 +2,8 @@ package cli import ( "fmt" + "os" + "os/exec" "testing" "git.numtide.com/numtide/treefmt/internal/test" @@ -54,28 +56,28 @@ func TestSpecifyingFormatters(t *testing.T) { }, }) - out, err := cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir) + out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) as.NoError(err) as.Contains(string(out), "3 files changed") - out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "elm,nix") + out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "elm,nix") as.NoError(err) as.Contains(string(out), "2 files changed") - out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "ruby,nix") + out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "ruby,nix") as.NoError(err) as.Contains(string(out), "2 files changed") - out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "nix") + out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "nix") as.NoError(err) as.Contains(string(out), "1 files changed") // test bad names - out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "foo") + out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "foo") as.Errorf(err, "formatter not found in config: foo") - out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "bar,foo") + out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "bar,foo") as.Errorf(err, "formatter not found in config: bar") } @@ -150,3 +152,139 @@ func TestIncludesAndExcludes(t *testing.T) { as.NoError(err) as.Contains(string(out), fmt.Sprintf("%d files changed", 2)) } + +func TestCache(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + configPath := tempDir + "/echo.toml" + + // test without any excludes + config := format.Config{ + Formatters: map[string]*format.Formatter{ + "echo": { + Command: "echo", + Includes: []string{"*"}, + }, + }, + } + + test.WriteConfig(t, configPath, config) + out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir) + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("%d files changed", 29)) + + out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir) + as.NoError(err) + as.Contains(string(out), "0 files changed") +} + +func TestBustCacheOnFormatterChange(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + configPath := tempDir + "/echo.toml" + + // symlink some formatters into temp dir, so we can mess with their mod times + binPath := tempDir + "/bin" + as.NoError(os.Mkdir(binPath, 0o755)) + + binaries := []string{"black", "elm-format", "gofmt"} + + for _, name := range binaries { + src, err := exec.LookPath(name) + as.NoError(err) + as.NoError(os.Symlink(src, binPath+"/"+name)) + } + + // prepend our test bin directory to PATH + as.NoError(os.Setenv("PATH", binPath+":"+os.Getenv("PATH"))) + + // start with 2 formatters + config := format.Config{ + Formatters: map[string]*format.Formatter{ + "python": { + Command: "black", + Includes: []string{"*.py"}, + }, + "elm": { + Command: "elm-format", + Options: []string{"--yes"}, + Includes: []string{"*.elm"}, + }, + }, + } + + test.WriteConfig(t, configPath, config) + args := []string{"--config-file", configPath, "--tree-root", tempDir} + out, err := cmd(t, args...) + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("%d files changed", 3)) + + // tweak mod time of elm formatter + as.NoError(test.RecreateSymlink(t, binPath+"/"+"elm-format")) + + out, err = cmd(t, args...) + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("%d files changed", 3)) + + // check cache is working + out, err = cmd(t, args...) + as.NoError(err) + as.Contains(string(out), "0 files changed") + + // tweak mod time of python formatter + as.NoError(test.RecreateSymlink(t, binPath+"/"+"black")) + + out, err = cmd(t, args...) + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("%d files changed", 3)) + + // check cache is working + out, err = cmd(t, args...) + as.NoError(err) + as.Contains(string(out), "0 files changed") + + // add go formatter + config.Formatters["go"] = &format.Formatter{ + Command: "gofmt", + Options: []string{"-w"}, + Includes: []string{"*.go"}, + } + test.WriteConfig(t, configPath, config) + + out, err = cmd(t, args...) + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("%d files changed", 4)) + + // check cache is working + out, err = cmd(t, args...) + as.NoError(err) + as.Contains(string(out), "0 files changed") + + // remove python formatter + delete(config.Formatters, "python") + test.WriteConfig(t, configPath, config) + + out, err = cmd(t, args...) + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("%d files changed", 2)) + + // check cache is working + out, err = cmd(t, args...) + as.NoError(err) + as.Contains(string(out), "0 files changed") + + // remove elm formatter + delete(config.Formatters, "elm") + test.WriteConfig(t, configPath, config) + + out, err = cmd(t, args...) + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("%d files changed", 1)) + + // check cache is working + out, err = cmd(t, args...) + as.NoError(err) + as.Contains(string(out), "0 files changed") +} diff --git a/internal/format/format.go b/internal/format/format.go index c1ab0668..26f9a663 100644 --- a/internal/format/format.go +++ b/internal/format/format.go @@ -159,7 +159,8 @@ func (f *Formatter) apply(ctx context.Context) error { start := time.Now() cmd := exec.CommandContext(ctx, f.Command, args...) - if _, err := cmd.CombinedOutput(); err != nil { + if out, err := cmd.CombinedOutput(); err != nil { + f.log.Debugf("\n%v", string(out)) // todo log output return err } diff --git a/internal/test/temp.go b/internal/test/temp.go index c997d806..3e338ad3 100644 --- a/internal/test/temp.go +++ b/internal/test/temp.go @@ -36,3 +36,11 @@ func TempFile(t *testing.T, path string) *os.File { } return file } + +func RecreateSymlink(t *testing.T, path string) error { + t.Helper() + src, err := os.Readlink(path) + require.NoError(t, err, "failed to read symlink") + require.NoError(t, os.Remove(path), "failed to remove symlink") + return os.Symlink(src, path) +}