Skip to content

Commit

Permalink
fix: path handling and checking git index
Browse files Browse the repository at this point in the history
- validates that all path arguments exist and are contained within the tree root
- fixes a bug with git index checking, where we were ignoring directories

Signed-off-by: Brian McGee <[email protected]>
  • Loading branch information
brianmcgee committed Sep 24, 2024
1 parent 601b122 commit 147dcdb
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 50 deletions.
28 changes: 25 additions & 3 deletions cli/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"path/filepath"
"runtime"
"runtime/pprof"
"strings"
"syscall"
"time"

Expand All @@ -35,6 +36,9 @@ func (f *Format) Run() (err error) {
// set log level and other options
f.configureLogging()

// initialise stats collection
stats.Init()

// ci mode
if f.Ci {
f.NoCache = true
Expand Down Expand Up @@ -123,6 +127,27 @@ func (f *Format) Run() (err error) {

log.Debugf("config-file=%s tree-root=%s", f.ConfigFile, f.TreeRoot)

// ensure all path arguments exist and are contained within the tree root
for _, path := range f.Paths {
relPath, err := filepath.Rel(f.TreeRoot, path)
if err != nil {
return fmt.Errorf("failed to determine relative path for %s to the tree root %s: %w", path, f.TreeRoot, err)
}
if strings.Contains(relPath, "..") {
return fmt.Errorf("path %s is outside the tree root %s", path, f.TreeRoot)
}
if f.Stdin {
// skip checking if the file exists if we are processing from stdin
// the file path is just used for matching against glob rules
continue
}
// check the path exists
_, err = os.Stat(path)
if err != nil {
return err
}
}

// read config
cfg, err := config.ReadFile(f.ConfigFile, f.Formatters)
if err != nil {
Expand Down Expand Up @@ -173,9 +198,6 @@ func (f *Format) Run() (err error) {
cancel()
}()

// initialise stats collection
stats.Init()

// create an overall error group for executing high level tasks concurrently
eg, ctx := errgroup.WithContext(ctx)

Expand Down
43 changes: 42 additions & 1 deletion cli/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,42 @@ func TestGitWorktree(t *testing.T) {
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--walk", "filesystem")
as.NoError(err)
assertStats(t, as, 60, 60, 60, 0)

// capture current cwd, so we can replace it after the test is finished
cwd, err := os.Getwd()
as.NoError(err)

t.Cleanup(func() {
// return to the previous working directory
as.NoError(os.Chdir(cwd))
})

// format specific sub paths
_, err = cmd(t, "-C", tempDir, "-c", "go", "-vv")
as.NoError(err)
assertStats(t, as, 2, 2, 2, 0)

_, err = cmd(t, "-C", tempDir, "-c", "go", "haskell")
as.NoError(err)
assertStats(t, as, 9, 9, 9, 0)

_, err = cmd(t, "-C", tempDir, "-c", "go", "haskell", "ruby")
as.NoError(err)
assertStats(t, as, 10, 10, 10, 0)

// try with a bad path
_, err = cmd(t, "-C", tempDir, "-c", "haskell", "foo")
as.ErrorContains(err, fmt.Sprintf("stat %s: no such file or directory", filepath.Join(tempDir, "foo")))
assertStats(t, as, 0, 0, 0, 0)

// try with a path not in the git index, e.g. it is skipped
_, err = os.Create(filepath.Join(tempDir, "foo.txt"))
as.NoError(err)
assertStats(t, as, 0, 0, 0, 0)

_, err = cmd(t, "-C", tempDir, "-c", "foo.txt")
as.NoError(err)
assertStats(t, as, 0, 0, 0, 0)
}

func TestPathsArg(t *testing.T) {
Expand Down Expand Up @@ -583,6 +619,11 @@ func TestPathsArg(t *testing.T) {
// specify a bad path
_, err = cmd(t, "-c", "elm/elm.json", "haskell/Nested/Bar.hs")
as.ErrorContains(err, "no such file or directory")

// specify a path outside the tree root
externalPath := filepath.Join(cwd, "go.mod")
_, err = cmd(t, "-c", externalPath)
as.ErrorContains(err, fmt.Sprintf("%s is outside the tree root %s", externalPath, tempDir))
}

func TestStdIn(t *testing.T) {
Expand Down Expand Up @@ -752,7 +793,7 @@ func TestRunInSubdir(t *testing.T) {
as.NoError(err)
assertStats(t, as, 32, 32, 32, 0)

// specify some explicit paths, relative to the elm/ sub-folder
// specify some explicit paths, relative to the tree root
_, err = cmd(t, "-c", "elm.json", "../haskell/Nested/Foo.hs")
as.NoError(err)
assertStats(t, as, 2, 2, 2, 0)
Expand Down
151 changes: 105 additions & 46 deletions walk/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,71 @@ package walk
import (
"context"
"fmt"
"github.com/charmbracelet/log"
"github.com/go-git/go-git/v5/plumbing/filemode"
"github.com/go-git/go-git/v5/plumbing/format/index"
"io/fs"
"os"
"path/filepath"

"github.com/charmbracelet/log"
"strings"

"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing/filemode"
)

// fileTree represents a hierarchical file structure with directories and files.
type fileTree struct {
name string
entries map[string]*fileTree
}

// add inserts a file path into the fileTree structure, creating necessary parent directories if they do not exist.
func (n *fileTree) add(path []string) {
if len(path) == 0 {
return
} else if n.entries == nil {
n.entries = make(map[string]*fileTree)
}

name := path[0]
child, ok := n.entries[name]
if !ok {
child = &fileTree{name: name}
n.entries[name] = child
}
child.add(path[1:])
}

// addPath splits the given path by the filepath separator and inserts it into the fileTree structure.
func (n *fileTree) addPath(path string) {
n.add(strings.Split(path, string(filepath.Separator)))
}

// has returns true if the specified path exists in the fileTree, false otherwise.
func (n *fileTree) has(path []string) bool {
if len(path) == 0 {
return true
} else if len(n.entries) == 0 {
return false
}
child, ok := n.entries[path[0]]
if !ok {
return false
}
return child.has(path[1:])
}

// hasPath splits the given path by the filepath separator and checks if it exists in the fileTree.
func (n *fileTree) hasPath(path string) bool {
return n.has(strings.Split(path, string(filepath.Separator)))
}

// readIndex traverses the index entries and adds each file path to the fileTree structure.
func (n *fileTree) readIndex(idx *index.Index) {
for _, entry := range idx.Entries {
n.addPath(entry.Name)
}
}

type gitWalker struct {
log *log.Logger
root string
Expand All @@ -25,12 +80,7 @@ func (g gitWalker) Root() string {
return g.root
}

func (g gitWalker) relPath(path string) (string, error) {
// quick optimization for the majority of use cases
if len(path) >= g.relPathOffset && path[:len(g.root)] == g.root {
return path[g.relPathOffset:], nil
}
// fallback to proper relative path resolution
func (g gitWalker) relPath(path string) (string, error) { //
return filepath.Rel(g.root, path)
}

Expand All @@ -40,12 +90,16 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
return fmt.Errorf("failed to open git index: %w", err)
}

// cache in-memory whether a path is present in the git index
var cache map[string]bool
// if we need to walk a path that is not the root of the repository, we will read the directory structure of the
// git index into memory for faster lookups
var cache *fileTree

for path := range g.paths {

if path == g.root {
switch path {

case g.root:

// we can just iterate the index entries
for _, entry := range idx.Entries {
select {
Expand Down Expand Up @@ -86,51 +140,56 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
}
}
}
continue
}

// otherwise we ensure the git index entries are cached and then check if they are in the git index
if cache == nil {
cache = make(map[string]bool)
for _, entry := range idx.Entries {
cache[entry.Name] = true
}
}

relPath, err := filepath.Rel(g.root, path)
if err != nil {
return fmt.Errorf("failed to find relative path for %v: %w", path, err)
}

_, ok := cache[relPath]
if !(path == g.root || ok) {
log.Debugf("path %v not found in git index, skipping", path)
continue
}
default:

return filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error {
if info.IsDir() {
return nil
// read the git index into memory if it hasn't already
if cache == nil {
cache = &fileTree{name: ""}
cache.readIndex(idx)
}

// git index entries are relative to the repository root, so we need to determine a relative path for the
// one we are currently processing before checking if it exists within the git index
relPath, err := g.relPath(path)
if err != nil {
return fmt.Errorf("failed to determine a relative path for %s: %w", path, err)
return fmt.Errorf("failed to find root relative path for %v: %w", path, err)
}

if _, ok := cache[relPath]; !ok {
log.Debugf("path %v not found in git index, skipping", path)
return nil
if !cache.hasPath(relPath) {
log.Debugf("path %s not found in git index, skipping", relPath)
continue
}

file := File{
Path: path,
RelPath: relPath,
Info: info,
}
err = filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error {
// skip directories
if info.IsDir() {
return nil
}

// determine a path relative to g.root before checking presence in the git index
relPath, err := g.relPath(path)
if err != nil {
return fmt.Errorf("failed to determine a relative path for %s: %w", path, err)
}

if !cache.hasPath(relPath) {
log.Debugf("path %v not found in git index, skipping", relPath)
return nil
}

return fn(&file, err)
})
file := File{
Path: path,
RelPath: relPath,
Info: info,
}

return fn(&file, err)
})
if err != nil {
return fmt.Errorf("failed to walk %s: %w", path, err)
}
}
}

return nil
Expand Down
31 changes: 31 additions & 0 deletions walk/git_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package walk

import (
"github.com/stretchr/testify/require"
"testing"
)

func TestFileTree(t *testing.T) {

as := require.New(t)

node := &fileTree{name: ""}
node.addPath("foo/bar/baz")
node.addPath("fizz/buzz")
node.addPath("hello/world")
node.addPath("foo/bar/fizz")
node.addPath("foo/fizz/baz")

as.True(node.hasPath("foo"))
as.True(node.hasPath("foo/bar"))
as.True(node.hasPath("foo/bar/baz"))
as.True(node.hasPath("fizz"))
as.True(node.hasPath("fizz/buzz"))
as.True(node.hasPath("hello"))
as.True(node.hasPath("hello/world"))
as.True(node.hasPath("foo/bar/fizz"))
as.True(node.hasPath("foo/fizz/baz"))

as.False(node.hasPath("fo"))
as.False(node.hasPath("world"))
}

0 comments on commit 147dcdb

Please sign in to comment.