Skip to content

Commit

Permalink
feat/bust-cache-validators-change (#14)
Browse files Browse the repository at this point in the history
Tracks the mod time and size of a formatter's executable in bolt.

The cache is busted using the following criteria:

- a new formatter has been configured.
- an existing formatter has changed (mod time or size)
- an existing formatter has been removed from config

Also implemented better resolution of symlinks when determining a formatters executable path.

Reviewed-on: https://git.numtide.com/numtide/treefmt/pulls/14
Reviewed-by: Jonas Chevalier <[email protected]>
Co-authored-by: Brian McGee <[email protected]>
Co-committed-by: Brian McGee <[email protected]>
  • Loading branch information
brianmcgee authored and Brian McGee committed Jan 3, 2024
1 parent ada9a72 commit 811f883
Show file tree
Hide file tree
Showing 5 changed files with 256 additions and 39 deletions.
126 changes: 96 additions & 30 deletions internal/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,23 @@ import (
"context"
"crypto/sha1"
"encoding/hex"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"time"

"git.numtide.com/numtide/treefmt/internal/format"
"github.com/charmbracelet/log"

"github.com/adrg/xdg"
"github.com/vmihailenco/msgpack/v5"
bolt "go.etcd.io/bbolt"
)

const (
modifiedBucket = "modified"
pathsBucket = "paths"
formattersBucket = "formatters"
)

// Entry represents a cache entry, indicating the last size and modified time for a file path.
Expand All @@ -33,7 +36,9 @@ var db *bolt.DB
//
// The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/<id>.db`, where <id> is determined by hashing
// the treeRoot path. This associates a given treeRoot with a given instance of the cache.
func Open(treeRoot string, clean bool) (err error) {
func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) {
l := log.WithPrefix("cache")

// determine a unique and consistent db name for the tree root
h := sha1.New()
h.Write([]byte(treeRoot))
Expand All @@ -45,27 +50,84 @@ func Open(treeRoot string, clean bool) (err error) {
return fmt.Errorf("%w: could not resolve local path for the cache", err)
}

// force a clean of the cache if specified
if clean {
err := os.Remove(path)
if errors.Is(err, os.ErrNotExist) {
err = nil
} else if err != nil {
return fmt.Errorf("%w: failed to clear cache", err)
}
}

db, err = bolt.Open(path, 0o600, nil)
if err != nil {
return fmt.Errorf("%w: failed to open cache", err)
}

err = db.Update(func(tx *bolt.Tx) error {
_, err := tx.CreateBucket([]byte(modifiedBucket))
if errors.Is(err, bolt.ErrBucketExists) {
// create bucket for tracking paths
pathsBucket, err := tx.CreateBucketIfNotExists([]byte(pathsBucket))
if err != nil {
return fmt.Errorf("%w: failed to create paths bucket", err)
}

// create bucket for tracking formatters
formattersBucket, err := tx.CreateBucketIfNotExists([]byte(formattersBucket))
if err != nil {
return fmt.Errorf("%w: failed to create formatters bucket", err)
}

// check for any newly configured or modified formatters
for name, formatter := range formatters {

stat, err := os.Lstat(formatter.Executable())
if err != nil {
return fmt.Errorf("%w: failed to state formatter executable", err)
}

entry, err := getEntry(formattersBucket, name)
if err != nil {
return fmt.Errorf("%w: failed to retrieve entry for formatter", err)
}

clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime())
l.Debug(
"checking if formatter has changed",
"name", name,
"clean", clean,
"entry", entry,
"stat", stat,
)

// record formatters info
entry = &Entry{
Size: stat.Size(),
Modified: stat.ModTime(),
}

if err = putEntry(formattersBucket, name, entry); err != nil {
return fmt.Errorf("%w: failed to write formatter entry", err)
}
}

// check for any removed formatters
if err = formattersBucket.ForEach(func(key []byte, _ []byte) error {
_, ok := formatters[string(key)]
if !ok {
// remove the formatter entry from the cache
if err = formattersBucket.Delete(key); err != nil {
return fmt.Errorf("%w: failed to remove formatter entry", err)
}
// indicate a clean is required
clean = true
}
return nil
}); err != nil {
return fmt.Errorf("%w: failed to check for removed formatters", err)
}

if clean {
// remove all path entries
c := pathsBucket.Cursor()
for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() {
if err = c.Delete(); err != nil {
return fmt.Errorf("%w: failed to remove path entry", err)
}
}
}
return err

return nil
})

return
Expand Down Expand Up @@ -93,11 +155,24 @@ func getEntry(bucket *bolt.Bucket, path string) (*Entry, error) {
}
}

// putEntry is a helper for writing cache entries into bolt.
func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error {
bytes, err := msgpack.Marshal(entry)
if err != nil {
return fmt.Errorf("%w: failed to marshal cache entry", err)
}

if err = bucket.Put([]byte(path), bytes); err != nil {
return fmt.Errorf("%w: failed to put cache entry", err)
}
return nil
}

// ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh.
// It determines if a path is new or has changed by comparing against cache entries.
func ChangeSet(ctx context.Context, root string, pathsCh chan<- string) error {
return db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket([]byte(modifiedBucket))
bucket := tx.Bucket([]byte(pathsBucket))

return filepath.Walk(root, func(path string, info fs.FileInfo, err error) error {
if err != nil {
Expand Down Expand Up @@ -142,13 +217,9 @@ func Update(paths []string) (int, error) {
var changes int

return changes, db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket([]byte(modifiedBucket))
bucket := tx.Bucket([]byte(pathsBucket))

for _, path := range paths {
if path == "" {
continue
}

cached, err := getEntry(bucket, path)
if err != nil {
return err
Expand All @@ -166,18 +237,13 @@ func Update(paths []string) (int, error) {
continue
}

cacheInfo := Entry{
entry := Entry{
Size: pathInfo.Size(),
Modified: pathInfo.ModTime(),
}

bytes, err := msgpack.Marshal(cacheInfo)
if err != nil {
return fmt.Errorf("%w: failed to marshal mod time", err)
}

if err = bucket.Put([]byte(path), bytes); err != nil {
return fmt.Errorf("%w: failed to put mode time", err)
if err = putEntry(bucket, path, &entry); err != nil {
return err
}
}

Expand Down
8 changes: 6 additions & 2 deletions internal/cli/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cli

import (
"context"
"errors"
"fmt"
"os"
"os/signal"
Expand Down Expand Up @@ -71,7 +72,7 @@ func (f *Format) Run() error {
}

err = formatter.Init(name, globalExcludes)
if err == format.ErrFormatterNotFound && Cli.AllowMissingFormatter {
if errors.Is(err, format.ErrFormatterNotFound) && Cli.AllowMissingFormatter {
l.Debugf("formatter not found: %v", name)
// remove this formatter
delete(cfg.Formatters, name)
Expand All @@ -82,7 +83,7 @@ func (f *Format) Run() error {

ctx = format.RegisterFormatters(ctx, cfg.Formatters)

if err = cache.Open(Cli.TreeRoot, Cli.ClearCache); err != nil {
if err = cache.Open(Cli.TreeRoot, Cli.ClearCache, cfg.Formatters); err != nil {
return err
}

Expand Down Expand Up @@ -110,12 +111,15 @@ func (f *Format) Run() error {
eg.Go(func() error {
batchSize := 1024
batch := make([]string, batchSize)
batch = batch[:0]

var pending, completed, changes int

LOOP:
for {
select {
case <-ctx.Done():
return ctx.Err()
case _, ok := <-pendingCh:
if ok {
pending += 1
Expand Down
Loading

0 comments on commit 811f883

Please sign in to comment.