Skip to content

Commit

Permalink
Merge pull request #1 from numtide/feat/improve-stdin-and-path-proces…
Browse files Browse the repository at this point in the history
…sing

Streaming processing of paths passed in from stdin
  • Loading branch information
brianmcgee authored May 8, 2024
2 parents 306fe0f + 2e77e12 commit 9328b46
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 87 deletions.
55 changes: 44 additions & 11 deletions cli/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,28 @@ func updateCache(ctx context.Context) func() error {

func walkFilesystem(ctx context.Context) func() error {
return func() error {
paths := Cli.Paths
eg, ctx := errgroup.WithContext(ctx)
pathsCh := make(chan string, BatchSize)

// we read paths from stdin if the cli flag has been set and no paths were provided as cli args
if len(paths) == 0 && Cli.Stdin {
walkPaths := func() error {
defer close(pathsCh)

var idx int
for idx < len(Cli.Paths) {
select {
case <-ctx.Done():
return ctx.Err()
default:
pathsCh <- Cli.Paths[idx]
idx += 1
}
}

return nil
}

walkStdin := func() error {
defer close(pathsCh)

// determine the current working directory
cwd, err := os.Getwd()
Expand All @@ -220,20 +238,35 @@ func walkFilesystem(ctx context.Context) func() error {

// read in all the paths
scanner := bufio.NewScanner(os.Stdin)

for scanner.Scan() {
path := scanner.Text()
if !strings.HasPrefix(path, "/") {
// append the cwd
path = filepath.Join(cwd, path)
select {
case <-ctx.Done():
return ctx.Err()
default:
path := scanner.Text()
if !strings.HasPrefix(path, "/") {
// append the cwd
path = filepath.Join(cwd, path)
}
pathsCh <- path
}

// append the fully qualified path to our paths list
paths = append(paths, path)
}
return nil
}

if len(Cli.Paths) > 0 {
eg.Go(walkPaths)
} else if Cli.Stdin {
eg.Go(walkStdin)
} else {
// no explicit paths to process, so we only need to process root
pathsCh <- Cli.TreeRoot
close(pathsCh)
}

// create a filesystem walker
walker, err := walk.New(Cli.Walk, Cli.TreeRoot, paths)
walker, err := walk.New(Cli.Walk, Cli.TreeRoot, pathsCh)
if err != nil {
return fmt.Errorf("failed to create walker: %w", err)
}
Expand Down
4 changes: 2 additions & 2 deletions cli/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ func TestPathsArg(t *testing.T) {
// specify some explicit paths
_, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Foo.hs")
as.NoError(err)
assertStats(t, as, 4, 4, 4, 0)
assertStats(t, as, 2, 2, 2, 0)

// specify a bad path
_, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Bar.hs")
Expand Down Expand Up @@ -548,7 +548,7 @@ go/main.go

_, err = cmd(t, "-C", tempDir, "--stdin")
as.NoError(err)
assertStats(t, as, 6, 6, 6, 0)
assertStats(t, as, 3, 3, 3, 0)
}

func TestDeterministicOrderingInPipeline(t *testing.T) {
Expand Down
26 changes: 5 additions & 21 deletions walk/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@ package walk
import (
"context"
"io/fs"
"os"
"path/filepath"
)

type filesystemWalker struct {
root string
paths []string
root string
pathsCh chan string
}

func (f filesystemWalker) Root() string {
Expand All @@ -35,30 +34,15 @@ func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error {
return fn(&file, err)
}

if len(f.paths) == 0 {
return filepath.Walk(f.root, walkFn)
}

for _, path := range f.paths {
info, err := os.Stat(path)
if err = filepath.Walk(path, walkFn); err != nil {
return err
}

file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}

if err = fn(&file, err); err != nil {
for path := range f.pathsCh {
if err := filepath.Walk(path, walkFn); err != nil {
return err
}
}

return nil
}

func NewFilesystem(root string, paths []string) (Walker, error) {
func NewFilesystem(root string, paths chan string) (Walker, error) {
return filesystemWalker{root, paths}, nil
}
109 changes: 63 additions & 46 deletions walk/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@ package walk

import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"

"github.com/charmbracelet/log"
"github.com/go-git/go-git/v5/plumbing/format/index"

"github.com/go-git/go-git/v5"
)

type gitWalker struct {
root string
paths []string
paths chan string
repo *git.Repository
}

Expand All @@ -40,66 +38,85 @@ func (g *gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
return fmt.Errorf("failed to open git index: %w", err)
}

if len(g.paths) > 0 {
for _, path := range g.paths {
// cache in-memory whether a path is present in the git index
var cache map[string]bool

err = filepath.Walk(path, func(path string, info fs.FileInfo, err error) error {
if info.IsDir() {
return nil
}
for path := range g.paths {

relPath, err := filepath.Rel(g.root, path)
if err != nil {
return err
}
if path == g.root {
// we can just iterate the index entries
for _, entry := range idx.Entries {
select {
case <-ctx.Done():
return ctx.Err()
default:
path := filepath.Join(g.root, entry.Name)

if _, err = idx.Entry(relPath); errors.Is(err, index.ErrEntryNotFound) {
// we skip this path as it's not staged
log.Debugf("Path not found in git index, skipping: %v, %v", relPath, path)
return nil
}
// stat the file
info, err := os.Lstat(path)

file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}

if err = fn(&file, err); err != nil {
return err
}
}
}
continue
}

// otherwise we ensure the git index entries are cached and then check if they are in the git index
if cache == nil {
cache = make(map[string]bool)
for _, entry := range idx.Entries {
cache[entry.Name] = true
}
}

relPath, err := filepath.Rel(g.root, path)
if err != nil {
return fmt.Errorf("failed to find relative path for %v: %w", path, err)
}

return fn(&file, err)
})
_, ok := cache[relPath]
if !(path == g.root || ok) {
log.Debugf("path %v not found in git index, skipping", path)
continue
}

return filepath.Walk(path, func(path string, info fs.FileInfo, err error) error {
if info.IsDir() {
return nil
}

relPath, err := filepath.Rel(g.root, path)
if err != nil {
return err
}

}
} else {
for _, entry := range idx.Entries {
select {
case <-ctx.Done():
return ctx.Err()
default:
path := filepath.Join(g.root, entry.Name)

// stat the file
info, err := os.Lstat(path)

file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
if _, ok := cache[relPath]; !ok {
log.Debugf("path %v not found in git index, skipping", path)
return nil
}

if err = fn(&file, err); err != nil {
return err
}
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
}

return fn(&file, err)
})
}

return nil
}

func NewGit(root string, paths []string) (Walker, error) {
func NewGit(root string, paths chan string) (Walker, error) {
repo, err := git.PlainOpen(root)
if err != nil {
return nil, fmt.Errorf("failed to open git repo: %w", err)
Expand Down
14 changes: 7 additions & 7 deletions walk/walker.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,24 +31,24 @@ type Walker interface {
Walk(ctx context.Context, fn WalkFunc) error
}

func New(walkerType Type, root string, paths []string) (Walker, error) {
func New(walkerType Type, root string, pathsCh chan string) (Walker, error) {
switch walkerType {
case Git:
return NewGit(root, paths)
return NewGit(root, pathsCh)
case Auto:
return Detect(root, paths)
return Detect(root, pathsCh)
case Filesystem:
return NewFilesystem(root, paths)
return NewFilesystem(root, pathsCh)
default:
return nil, fmt.Errorf("unknown walker type: %v", walkerType)
}
}

func Detect(root string, paths []string) (Walker, error) {
func Detect(root string, pathsCh chan string) (Walker, error) {
// for now, we keep it simple and try git first, filesystem second
w, err := NewGit(root, paths)
w, err := NewGit(root, pathsCh)
if err == nil {
return w, err
}
return NewFilesystem(root, paths)
return NewFilesystem(root, pathsCh)
}

0 comments on commit 9328b46

Please sign in to comment.