Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support parallel directory summary #3340

Merged
merged 5 commits into from
Mar 16, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 103 additions & 68 deletions pkg/meta/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (

"github.com/juicedata/juicefs/pkg/utils"
"github.com/redis/go-redis/v9"
"golang.org/x/sync/errgroup"
)

const (
Expand Down Expand Up @@ -336,40 +337,58 @@ func GetSummary(r Meta, ctx Context, inode Ino, summary *Summary, recursive bool
if st := r.GetAttr(ctx, inode, &attr); st != 0 {
return st
}
if attr.Typ == TypeDirectory {
var entries []*Entry
if st := r.Readdir(ctx, inode, 1, &entries); st != 0 {
return st
if attr.Typ != TypeDirectory {
summary.Files++
summary.Size += uint64(align4K(attr.Length))
if attr.Typ == TypeFile {
summary.Length += attr.Length
}
for _, e := range entries {
if e.Inode == inode || len(e.Name) == 2 && bytes.Equal(e.Name, []byte("..")) {
continue
}
if e.Attr.Typ == TypeDirectory {
if recursive {
if st := GetSummary(r, ctx, e.Inode, summary, recursive); st != 0 {
return st
return 0
}
summary.Dirs++
summary.Size += uint64(align4K(0))

const concurrency = 50
dirs := []Ino{inode}
for len(dirs) > 0 {
entriesList := make([][]*Entry, len(dirs))
var eg errgroup.Group
eg.SetLimit(concurrency)
for i := range dirs {
ino := dirs[i]
entries := &entriesList[i]
eg.Go(func() error {
st := r.Readdir(ctx, ino, 1, entries)
if st != 0 && st != syscall.ENOENT {
return st
}
return nil
})
}
if err := eg.Wait(); err != nil {
return err.(syscall.Errno)
}
dirs = dirs[:0]
for _, entries := range entriesList {
for _, e := range entries {
if bytes.Equal(e.Name, []byte(".")) || bytes.Equal(e.Name, []byte("..")) {
continue
}
if e.Attr.Typ == TypeDirectory {
summary.Dirs++
summary.Size += uint64(align4K(0))
if recursive {
dirs = append(dirs, e.Inode)
}
} else {
summary.Dirs++
summary.Size += 4096
}
} else {
summary.Files++
summary.Size += uint64(align4K(e.Attr.Length))
if e.Attr.Typ == TypeFile {
summary.Length += e.Attr.Length
summary.Files++
summary.Size += uint64(align4K(e.Attr.Length))
if e.Attr.Typ == TypeFile {
summary.Length += e.Attr.Length
}
}
}
}
summary.Dirs++
summary.Size += 4096
} else {
summary.Files++
summary.Size += uint64(align4K(attr.Length))
if attr.Typ == TypeFile {
summary.Length += attr.Length
}
}
return 0
}
Expand All @@ -379,57 +398,73 @@ func FastGetSummary(r Meta, ctx Context, inode Ino, summary *Summary, recursive
if st := r.GetAttr(ctx, inode, &attr); st != 0 {
return st
}
if attr.Typ == TypeDirectory {
summary.Dirs++
return fastGetSummary(r, ctx, inode, summary, recursive)
} else {
if attr.Typ != TypeDirectory {
summary.Files++
summary.Size += uint64(align4K(attr.Length))
if attr.Typ == TypeFile {
summary.Length += attr.Length
}
}
return 0
}

func fastGetSummary(r Meta, ctx Context, inode Ino, summary *Summary, recursive bool) syscall.Errno {
st, err := r.GetDirStat(ctx, inode)
if err != nil {
return errno(err)
}
summary.Size += uint64(st.space)
summary.Length += uint64(st.length)

var attr Attr
if st := r.GetAttr(ctx, inode, &attr); st != 0 {
if st == syscall.ENOENT {
// directory is removed, ignore it
return 0
}
return st
}
if attr.Nlink == 2 {
summary.Files += uint64(st.inodes)
return 0
}
summary.Dirs++

var entries []*Entry
if st := r.Readdir(ctx, inode, 0, &entries); st != 0 {
return st
}
for _, e := range entries {
if e.Inode == inode || len(e.Name) == 2 && bytes.Equal(e.Name, []byte("..")) {
continue
}
if e.Attr.Typ == TypeDirectory {
summary.Dirs++
if recursive {
if st := fastGetSummary(r, ctx, e.Inode, summary, recursive); st != 0 {
const concurrency = 50
dirs := []Ino{inode}
for len(dirs) > 0 {
entriesList := make([][]*Entry, len(dirs))
dirStats := make([]dirStat, len(dirs))
var eg errgroup.Group
eg.SetLimit(concurrency)
for i := range dirs {
ino := dirs[i]
entries := &entriesList[i]
stat := &dirStats[i]
eg.Go(func() error {
s, err := r.GetDirStat(ctx, ino)
if err != nil {
return err
}
*stat = *s
var attr Attr
if st := r.GetAttr(ctx, ino, &attr); st != 0 && st != syscall.ENOENT {
return st
}
if attr.Nlink == 2 {
// leaf dir, no need to read entries
return nil
}
if st := r.Readdir(ctx, ino, 0, entries); st != 0 && st != syscall.ENOENT {
return st
}
return nil
})
}
if err := eg.Wait(); err != nil {
return errno(err)
}
dirs = dirs[:0]
for i, entries := range entriesList {
stat := dirStats[i]
summary.Size += uint64(stat.space)
summary.Length += uint64(stat.length)
if entries == nil {
// leaf dir
summary.Files += uint64(stat.inodes)
continue
}
for _, e := range entries {
if bytes.Equal(e.Name, []byte(".")) || bytes.Equal(e.Name, []byte("..")) {
continue
}
if e.Attr.Typ == TypeDirectory {
summary.Dirs++
if recursive {
dirs = append(dirs, e.Inode)
}
} else {
summary.Files++
}
}
} else {
summary.Files++
}
}
return 0
Expand Down