From 147dcdbcd69426d17deb6a8247e1b4d08b33ffea Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Tue, 24 Sep 2024 10:55:35 +0100 Subject: [PATCH] fix: path handling and checking git index - validates that all path arguments exist and are contained within the tree root - fixes a bug with git index checking, where we were ignoring directories Signed-off-by: Brian McGee --- cli/format.go | 28 ++++++++- cli/format_test.go | 43 ++++++++++++- walk/git.go | 151 +++++++++++++++++++++++++++++++-------------- walk/git_test.go | 31 ++++++++++ 4 files changed, 203 insertions(+), 50 deletions(-) create mode 100644 walk/git_test.go diff --git a/cli/format.go b/cli/format.go index 13b624d1..d1f5e8d3 100644 --- a/cli/format.go +++ b/cli/format.go @@ -10,6 +10,7 @@ import ( "path/filepath" "runtime" "runtime/pprof" + "strings" "syscall" "time" @@ -35,6 +36,9 @@ func (f *Format) Run() (err error) { // set log level and other options f.configureLogging() + // initialise stats collection + stats.Init() + // ci mode if f.Ci { f.NoCache = true @@ -123,6 +127,27 @@ func (f *Format) Run() (err error) { log.Debugf("config-file=%s tree-root=%s", f.ConfigFile, f.TreeRoot) + // ensure all path arguments exist and are contained within the tree root + for _, path := range f.Paths { + relPath, err := filepath.Rel(f.TreeRoot, path) + if err != nil { + return fmt.Errorf("failed to determine relative path for %s to the tree root %s: %w", path, f.TreeRoot, err) + } + if strings.Contains(relPath, "..") { + return fmt.Errorf("path %s is outside the tree root %s", path, f.TreeRoot) + } + if f.Stdin { + // skip checking if the file exists if we are processing from stdin + // the file path is just used for matching against glob rules + continue + } + // check the path exists + _, err = os.Stat(path) + if err != nil { + return err + } + } + // read config cfg, err := config.ReadFile(f.ConfigFile, f.Formatters) if err != nil { @@ -173,9 +198,6 @@ func (f *Format) Run() (err error) { cancel() }() - // initialise stats collection - stats.Init() - // create an overall error group for executing high level tasks concurrently eg, ctx := errgroup.WithContext(ctx) diff --git a/cli/format_test.go b/cli/format_test.go index 593bfd79..53ac9e42 100644 --- a/cli/format_test.go +++ b/cli/format_test.go @@ -539,6 +539,42 @@ func TestGitWorktree(t *testing.T) { _, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--walk", "filesystem") as.NoError(err) assertStats(t, as, 60, 60, 60, 0) + + // capture current cwd, so we can replace it after the test is finished + cwd, err := os.Getwd() + as.NoError(err) + + t.Cleanup(func() { + // return to the previous working directory + as.NoError(os.Chdir(cwd)) + }) + + // format specific sub paths + _, err = cmd(t, "-C", tempDir, "-c", "go", "-vv") + as.NoError(err) + assertStats(t, as, 2, 2, 2, 0) + + _, err = cmd(t, "-C", tempDir, "-c", "go", "haskell") + as.NoError(err) + assertStats(t, as, 9, 9, 9, 0) + + _, err = cmd(t, "-C", tempDir, "-c", "go", "haskell", "ruby") + as.NoError(err) + assertStats(t, as, 10, 10, 10, 0) + + // try with a bad path + _, err = cmd(t, "-C", tempDir, "-c", "haskell", "foo") + as.ErrorContains(err, fmt.Sprintf("stat %s: no such file or directory", filepath.Join(tempDir, "foo"))) + assertStats(t, as, 0, 0, 0, 0) + + // try with a path not in the git index, e.g. it is skipped + _, err = os.Create(filepath.Join(tempDir, "foo.txt")) + as.NoError(err) + assertStats(t, as, 0, 0, 0, 0) + + _, err = cmd(t, "-C", tempDir, "-c", "foo.txt") + as.NoError(err) + assertStats(t, as, 0, 0, 0, 0) } func TestPathsArg(t *testing.T) { @@ -583,6 +619,11 @@ func TestPathsArg(t *testing.T) { // specify a bad path _, err = cmd(t, "-c", "elm/elm.json", "haskell/Nested/Bar.hs") as.ErrorContains(err, "no such file or directory") + + // specify a path outside the tree root + externalPath := filepath.Join(cwd, "go.mod") + _, err = cmd(t, "-c", externalPath) + as.ErrorContains(err, fmt.Sprintf("%s is outside the tree root %s", externalPath, tempDir)) } func TestStdIn(t *testing.T) { @@ -752,7 +793,7 @@ func TestRunInSubdir(t *testing.T) { as.NoError(err) assertStats(t, as, 32, 32, 32, 0) - // specify some explicit paths, relative to the elm/ sub-folder + // specify some explicit paths, relative to the tree root _, err = cmd(t, "-c", "elm.json", "../haskell/Nested/Foo.hs") as.NoError(err) assertStats(t, as, 2, 2, 2, 0) diff --git a/walk/git.go b/walk/git.go index 08465a80..cfd56c4b 100644 --- a/walk/git.go +++ b/walk/git.go @@ -3,16 +3,71 @@ package walk import ( "context" "fmt" + "github.com/charmbracelet/log" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/format/index" "io/fs" "os" "path/filepath" - - "github.com/charmbracelet/log" + "strings" "github.com/go-git/go-git/v5" - "github.com/go-git/go-git/v5/plumbing/filemode" ) +// fileTree represents a hierarchical file structure with directories and files. +type fileTree struct { + name string + entries map[string]*fileTree +} + +// add inserts a file path into the fileTree structure, creating necessary parent directories if they do not exist. +func (n *fileTree) add(path []string) { + if len(path) == 0 { + return + } else if n.entries == nil { + n.entries = make(map[string]*fileTree) + } + + name := path[0] + child, ok := n.entries[name] + if !ok { + child = &fileTree{name: name} + n.entries[name] = child + } + child.add(path[1:]) +} + +// addPath splits the given path by the filepath separator and inserts it into the fileTree structure. +func (n *fileTree) addPath(path string) { + n.add(strings.Split(path, string(filepath.Separator))) +} + +// has returns true if the specified path exists in the fileTree, false otherwise. +func (n *fileTree) has(path []string) bool { + if len(path) == 0 { + return true + } else if len(n.entries) == 0 { + return false + } + child, ok := n.entries[path[0]] + if !ok { + return false + } + return child.has(path[1:]) +} + +// hasPath splits the given path by the filepath separator and checks if it exists in the fileTree. +func (n *fileTree) hasPath(path string) bool { + return n.has(strings.Split(path, string(filepath.Separator))) +} + +// readIndex traverses the index entries and adds each file path to the fileTree structure. +func (n *fileTree) readIndex(idx *index.Index) { + for _, entry := range idx.Entries { + n.addPath(entry.Name) + } +} + type gitWalker struct { log *log.Logger root string @@ -25,12 +80,7 @@ func (g gitWalker) Root() string { return g.root } -func (g gitWalker) relPath(path string) (string, error) { - // quick optimization for the majority of use cases - if len(path) >= g.relPathOffset && path[:len(g.root)] == g.root { - return path[g.relPathOffset:], nil - } - // fallback to proper relative path resolution +func (g gitWalker) relPath(path string) (string, error) { // return filepath.Rel(g.root, path) } @@ -40,12 +90,16 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { return fmt.Errorf("failed to open git index: %w", err) } - // cache in-memory whether a path is present in the git index - var cache map[string]bool + // if we need to walk a path that is not the root of the repository, we will read the directory structure of the + // git index into memory for faster lookups + var cache *fileTree for path := range g.paths { - if path == g.root { + switch path { + + case g.root: + // we can just iterate the index entries for _, entry := range idx.Entries { select { @@ -86,51 +140,56 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { } } } - continue - } - // otherwise we ensure the git index entries are cached and then check if they are in the git index - if cache == nil { - cache = make(map[string]bool) - for _, entry := range idx.Entries { - cache[entry.Name] = true - } - } - - relPath, err := filepath.Rel(g.root, path) - if err != nil { - return fmt.Errorf("failed to find relative path for %v: %w", path, err) - } - - _, ok := cache[relPath] - if !(path == g.root || ok) { - log.Debugf("path %v not found in git index, skipping", path) - continue - } + default: - return filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error { - if info.IsDir() { - return nil + // read the git index into memory if it hasn't already + if cache == nil { + cache = &fileTree{name: ""} + cache.readIndex(idx) } + // git index entries are relative to the repository root, so we need to determine a relative path for the + // one we are currently processing before checking if it exists within the git index relPath, err := g.relPath(path) if err != nil { - return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) + return fmt.Errorf("failed to find root relative path for %v: %w", path, err) } - if _, ok := cache[relPath]; !ok { - log.Debugf("path %v not found in git index, skipping", path) - return nil + if !cache.hasPath(relPath) { + log.Debugf("path %s not found in git index, skipping", relPath) + continue } - file := File{ - Path: path, - RelPath: relPath, - Info: info, - } + err = filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error { + // skip directories + if info.IsDir() { + return nil + } + + // determine a path relative to g.root before checking presence in the git index + relPath, err := g.relPath(path) + if err != nil { + return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) + } + + if !cache.hasPath(relPath) { + log.Debugf("path %v not found in git index, skipping", relPath) + return nil + } - return fn(&file, err) - }) + file := File{ + Path: path, + RelPath: relPath, + Info: info, + } + + return fn(&file, err) + }) + if err != nil { + return fmt.Errorf("failed to walk %s: %w", path, err) + } + } } return nil diff --git a/walk/git_test.go b/walk/git_test.go new file mode 100644 index 00000000..811fccf0 --- /dev/null +++ b/walk/git_test.go @@ -0,0 +1,31 @@ +package walk + +import ( + "github.com/stretchr/testify/require" + "testing" +) + +func TestFileTree(t *testing.T) { + + as := require.New(t) + + node := &fileTree{name: ""} + node.addPath("foo/bar/baz") + node.addPath("fizz/buzz") + node.addPath("hello/world") + node.addPath("foo/bar/fizz") + node.addPath("foo/fizz/baz") + + as.True(node.hasPath("foo")) + as.True(node.hasPath("foo/bar")) + as.True(node.hasPath("foo/bar/baz")) + as.True(node.hasPath("fizz")) + as.True(node.hasPath("fizz/buzz")) + as.True(node.hasPath("hello")) + as.True(node.hasPath("hello/world")) + as.True(node.hasPath("foo/bar/fizz")) + as.True(node.hasPath("foo/fizz/baz")) + + as.False(node.hasPath("fo")) + as.False(node.hasPath("world")) +}