Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dump: skip files in trash when backup metadata #4479

Merged
merged 3 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cmd/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ Details: https://juicefs.com/docs/community/metadata_dump_load`,
Name: "fast",
Usage: "speedup dump by load all metadata into memory",
},
&cli.BoolFlag{
Name: "skip-trash",
Usage: "skip files in trash",
},
},
}
}
Expand Down Expand Up @@ -105,7 +109,7 @@ func dump(ctx *cli.Context) (err error) {
if st := m.Chroot(meta.Background, metaConf.Subdir); st != 0 {
return st
}
if err := m.DumpMeta(w, 1, ctx.Bool("keep-secret-key"), ctx.Bool("fast")); err != nil {
if err := m.DumpMeta(w, 1, ctx.Bool("keep-secret-key"), ctx.Bool("fast"), ctx.Bool("skip-trash")); err != nil {
return err
}
logger.Infof("Dump metadata into %s succeed", dst)
Expand Down
4 changes: 4 additions & 0 deletions cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ func metaFlags() []cli.Flag {
Value: "3600",
Usage: "interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup)",
},
&cli.BoolFlag{
Name: "backup-skip-trash",
Usage: "skip files in trash when backup metadata",
},
&cli.StringFlag{
Name: "heartbeat",
Value: "12",
Expand Down
21 changes: 11 additions & 10 deletions cmd/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,15 +246,16 @@ func expandPathForEmbedded(addr string) string {

func getVfsConf(c *cli.Context, metaConf *meta.Config, format *meta.Format, chunkConf *chunk.Config) *vfs.Config {
cfg := &vfs.Config{
Meta: metaConf,
Format: *format,
Version: version.Version(),
Chunk: chunkConf,
BackupMeta: duration(c.String("backup-meta")),
Port: &vfs.Port{DebugAgent: debugAgent, PyroscopeAddr: c.String("pyroscope")},
PrefixInternal: c.Bool("prefix-internal"),
Pid: os.Getpid(),
PPid: os.Getppid(),
Meta: metaConf,
Format: *format,
Version: version.Version(),
Chunk: chunkConf,
BackupMeta: duration(c.String("backup-meta")),
BackupSkipTrash: c.Bool("backup-skip-trash"),
Port: &vfs.Port{DebugAgent: debugAgent, PyroscopeAddr: c.String("pyroscope")},
PrefixInternal: c.Bool("prefix-internal"),
Pid: os.Getpid(),
PPid: os.Getppid(),
}
skip_check := os.Getenv("SKIP_BACKUP_META_CHECK") == "true"
if !skip_check && cfg.BackupMeta > 0 && cfg.BackupMeta < time.Minute*5 {
Expand Down Expand Up @@ -360,7 +361,7 @@ func initBackgroundTasks(c *cli.Context, vfsConf *vfs.Config, metaConf *meta.Con
vfsConf.Port.ConsulAddr = c.String("consul")
}
if !metaConf.ReadOnly && !metaConf.NoBGJob && vfsConf.BackupMeta > 0 {
go vfs.Backup(m, blob, vfsConf.BackupMeta)
go vfs.Backup(m, blob, vfsConf.BackupMeta, vfsConf.BackupSkipTrash)
}
if !c.Bool("no-usage-report") {
go usage.ReportUsage(m, version.Version())
Expand Down
1 change: 1 addition & 0 deletions docs/en/deployment/hadoop_java_sdk.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ Please refer to the following table to set the relevant parameters of the JuiceF
| `juicefs.no-usage-report` | `false` | Whether disable usage reporting. JuiceFS only collects anonymous usage data (e.g. version number), no user or any sensitive data will be collected. |
| `juicefs.no-bgjob` | `false` | Disable background jobs (clean-up, backup, etc.) |
| `juicefs.backup-meta` | 3600 | Interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup) |
|`juicefs.backup-skip-trash`| `false` | Skip files and directories in trash when backup metadata. |
| `juicefs.heartbeat` | 12 | Heartbeat interval (in seconds) between client and metadata engine. It's recommended that all clients use the same value. |

#### Multiple file systems configuration
Expand Down
3 changes: 3 additions & 0 deletions docs/en/reference/command_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ juicefs dump redis://localhost sub-meta-dump.json --subdir /dir/in/jfs
|`FILE`|Export file path, if not specified, it will be exported to standard output. If the filename ends with `.gz`, it will be automatically compressed.|
|`--subdir=path`|Only export metadata for the specified subdirectory.|
|`--keep-secret-key` <VersionAdd>1.1</VersionAdd> |Export object storage authentication information, the default is `false`. Since it is exported in plain text, pay attention to data security when using it. If the export file does not contain object storage authentication information, you need to use [`juicefs config`](#config) to reconfigure object storage authentication information after the subsequent import is completed.|
|`--fast` <VersionAdd>1.2</VersionAdd>|Use more memory to speedup dump.|
|`--skip-trash` <VersionAdd>1.2</VersionAdd>|Skip files and directories in trash.|

### `juicefs load` {#load}

Expand Down Expand Up @@ -628,6 +630,7 @@ juicefs mount redis://localhost /mnt/jfs --backup-meta 0
|-|-|
|`--subdir=value`|mount a sub-directory as root (default: "")|
|`--backup-meta=3600`|interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup) (default: "3600")|
|`--backup-skip-trash` <VersionAdd>1.2</VersionAdd>|skip files and directories in trash when backup metadata.|
|`--heartbeat=12`|interval (in seconds) to send heartbeat; it's recommended that all clients use the same heartbeat value (default: "12")|
|`--read-only`|allow lookup/read operations only (default: false)|
|`--no-bgjob`|Disable background jobs, default to false, which means clients by default carry out background jobs, including:<br/><ul><li>Clean up expired files in Trash (look for `cleanupDeletedFiles`, `cleanupTrash` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go))</li><li>Delete slices that's not referenced (look for `cleanupSlices` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go))</li><li>Clean up stale client sessions (look for `CleanStaleSessions` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go))</li></ul>Note that compaction isn't affected by this option, it happens automatically with file reads and writes, client will check if compaction is in need, and run in background (take Redis for example, look for `compactChunk` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/redis.go)).|
Expand Down
1 change: 1 addition & 0 deletions docs/zh_cn/deployment/hadoop_java_sdk.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ make win
| `juicefs.file.checksum` | `false` | DistCp 使用 `-update` 参数时,是否计算文件 Checksum |
| `juicefs.no-bgjob` | `false` | 是否关闭后台任务(清理、备份等) |
| `juicefs.backup-meta` | 3600 | 自动将 JuiceFS 元数据备份到对象存储间隔(单位:秒),设置为 0 关闭自动备份 |
|`juicefs.backup-skip-trash`| `false` | 备份元数据时忽略回收站中的文件和目录。 |
| `juicefs.heartbeat` | 12 | 客户端和元数据引擎之间的心跳间隔(单位:秒),建议所有客户端都设置一样 |

#### 多文件系统配置
Expand Down
3 changes: 3 additions & 0 deletions docs/zh_cn/reference/command_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ juicefs dump redis://localhost sub-meta-dump.json --subdir /dir/in/jfs
|`FILE`|导出文件路径,如果不指定,则会导出到标准输出。如果文件名以 `.gz` 结尾,将会自动压缩。|
|`--subdir=path`|只导出指定子目录的元数据。|
|`--keep-secret-key` <VersionAdd>1.1</VersionAdd>|导出对象存储认证信息,默认为 `false`。由于是明文导出,使用时注意数据安全。如果导出文件不包含对象存储认证信息,后续的导入完成后,需要用 [`juicefs config`](#config) 重新配置对象存储认证信息。|
|`--fast` <VersionAdd>1.2</VersionAdd>|使用更多内存来加速导出。|
|`--skip-trash` <VersionAdd>1.2</VersionAdd>|跳过回收站中的文件和目录。|

### `juicefs load` {#load}

Expand Down Expand Up @@ -628,6 +630,7 @@ juicefs mount redis://localhost /mnt/jfs --backup-meta 0
|-|-|
|`--subdir=value`|挂载指定的子目录,默认挂载整个文件系统。|
|`--backup-meta=3600`|自动备份元数据到对象存储的间隔时间;单位秒,默认 3600,设为 0 表示不备份。|
|`--backup-skip-trash` <VersionAdd>1.2</VersionAdd>|备份元数据时跳过回收站中的文件和目录。|
|`--heartbeat=12`|发送心跳的间隔(单位秒),建议所有客户端使用相同的心跳值 (默认:12)|
|`--read-only`|启用只读模式挂载。|
|`--no-bgjob`|禁用后台任务,默认为 false,也就是说客户端会默认运行后台任务。后台任务包含:<br/><ul><li>清理回收站中过期的文件(在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go) 中搜索 `cleanupDeletedFiles` 和 `cleanupTrash`)</li><li>清理引用计数为 0 的 Slice(在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go) 中搜索 `cleanupSlices`)</li><li>清理过期的客户端会话(在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go) 中搜索 `CleanStaleSessions`)</li></ul>特别地,与[企业版](https://juicefs.com/docs/zh/cloud/guide/background-job)不同,社区版碎片合并(Compaction)不受该选项的影响,而是随着文件读写操作,自动判断是否需要合并,然后异步执行(以 Redis 为例,在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/redis.go) 中搜索 `compactChunk`)|
Expand Down
2 changes: 1 addition & 1 deletion pkg/meta/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ type Meta interface {
HandleQuota(ctx Context, cmd uint8, dpath string, quotas map[string]*Quota, strict, repair bool) error

// Dump the tree under root, which may be modified by checkRoot
DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) error
DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) error
LoadMeta(r io.Reader) error

// getBase return the base engine.
Expand Down
2 changes: 1 addition & 1 deletion pkg/meta/load_dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ func testDump(t *testing.T, m Meta, root Ino, expect, result string) {
if _, err = m.Load(true); err != nil {
t.Fatalf("load setting: %s", err)
}
if err = m.DumpMeta(fp, root, false, true); err != nil {
if err = m.DumpMeta(fp, root, false, true, false); err != nil {
t.Fatalf("dump meta: %s", err)
}
cmd := exec.Command("diff", expect, result)
Expand Down
4 changes: 2 additions & 2 deletions pkg/meta/redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -3890,7 +3890,7 @@ func (m *redisMeta) dumpDir(inode Ino, tree *DumpedEntry, bw *bufio.Writer, dept
return nil
}

func (m *redisMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err error) {
func (m *redisMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) (err error) {
defer func() {
if p := recover(); p != nil {
if e, ok := p.(error); ok {
Expand Down Expand Up @@ -4011,7 +4011,7 @@ func (m *redisMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err
if err = m.dumpDir(root, tree, bw, 1, bar); err != nil {
return err
}
if root == RootInode {
if root == RootInode && !skipTrash {
trash := &DumpedEntry{
Name: "Trash",
Attr: &DumpedAttr{
Expand Down
4 changes: 2 additions & 2 deletions pkg/meta/sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -3577,7 +3577,7 @@ func (m *dbMeta) makeSnap(ses *xorm.Session, bar *utils.Bar) error {
return nil
}

func (m *dbMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err error) {
func (m *dbMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) (err error) {
defer func() {
if p := recover(); p != nil {
if e, ok := p.(error); ok {
Expand Down Expand Up @@ -3605,7 +3605,7 @@ func (m *dbMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err err
if tree, err = m.dumpEntry(s, root, TypeDirectory); err != nil {
return err
}
if root == 1 {
if root == 1 && !skipTrash {
if trash, err = m.dumpEntry(s, TrashInode, TypeDirectory); err != nil {
return err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/meta/tkv.go
Original file line number Diff line number Diff line change
Expand Up @@ -2999,7 +2999,7 @@ func (m *kvMeta) dumpDir(inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth i
return nil
}

func (m *kvMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err error) {
func (m *kvMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) (err error) {
defer func() {
if p := recover(); p != nil {
debug.PrintStack()
Expand Down Expand Up @@ -3115,7 +3115,7 @@ func (m *kvMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err err
if err = m.dumpEntry(root, tree); err != nil {
return err
}
if root == 1 {
if root == 1 && !skipTrash {
trash = &DumpedEntry{
Attr: &DumpedAttr{
Inode: TrashInode,
Expand Down
8 changes: 4 additions & 4 deletions pkg/vfs/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
)

// Backup metadata periodically in the object storage
func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) {
func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration, skipTrash bool) {
ctx := meta.Background
key := "lastBackup"
for {
Expand Down Expand Up @@ -65,7 +65,7 @@ func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) {
}
go cleanupBackups(blob, now)
logger.Debugf("backup metadata started")
if err = backup(m, blob, now); err == nil {
if err = backup(m, blob, now, skipTrash); err == nil {
logger.Infof("backup metadata succeed, used %s", time.Since(now))
} else {
logger.Warnf("backup metadata failed: %s", err)
Expand All @@ -74,7 +74,7 @@ func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) {
}
}

func backup(m meta.Meta, blob object.ObjectStorage, now time.Time) error {
func backup(m meta.Meta, blob object.ObjectStorage, now time.Time, skipTrash bool) error {
name := "dump-" + now.UTC().Format("2006-01-02-150405") + ".json.gz"
fp, err := os.CreateTemp("", "juicefs-meta-*")
if err != nil {
Expand All @@ -83,7 +83,7 @@ func backup(m meta.Meta, blob object.ObjectStorage, now time.Time) error {
defer os.Remove(fp.Name())
defer fp.Close()
zw := gzip.NewWriter(fp)
err = m.DumpMeta(zw, 0, false, false) // force dump the whole tree
err = m.DumpMeta(zw, 0, false, false, skipTrash) // force dump the whole tree
_ = zw.Close()
if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion pkg/vfs/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func TestRotate(t *testing.T) {

func TestBackup(t *testing.T) {
v, blob := createTestVFS()
go Backup(v.Meta, blob, time.Millisecond*100)
go Backup(v.Meta, blob, time.Millisecond*100, false)
time.Sleep(time.Millisecond * 100)

blob = object.WithPrefix(blob, "meta/")
Expand Down
1 change: 1 addition & 0 deletions pkg/vfs/vfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ type Config struct {
DirEntryTimeout time.Duration
EntryTimeout time.Duration
BackupMeta time.Duration
BackupSkipTrash bool
FastResolve bool `json:",omitempty"`
AccessLog string `json:",omitempty"`
PrefixInternal bool
Expand Down
4 changes: 3 additions & 1 deletion sdk/java/libjfs/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ type javaConf struct {
NoBGJob bool `json:"noBGJob"`
OpenCache float64 `json:"openCache"`
BackupMeta int64 `json:"backupMeta"`
BackupSkipTrash bool `json:"backupSkipTrash"`
Heartbeat int `json:"heartbeat"`
CacheDir string `json:"cacheDir"`
CacheSize int64 `json:"cacheSize"`
Expand Down Expand Up @@ -576,9 +577,10 @@ func jfs_init(cname, jsonConf, user, group, superuser, supergroup *C.char) int64
AccessLog: jConf.AccessLog,
FastResolve: jConf.FastResolve,
BackupMeta: time.Second * time.Duration(jConf.BackupMeta),
BackupSkipTrash: jConf.BackupSkipTrash,
}
if !jConf.ReadOnly && !jConf.NoSession && !jConf.NoBGJob && conf.BackupMeta > 0 {
go vfs.Backup(m, blob, conf.BackupMeta)
SandyXSD marked this conversation as resolved.
Show resolved Hide resolved
go vfs.Backup(m, blob, conf.BackupMeta, conf.BackupSkipTrash)
}
if !jConf.NoUsageReport && !jConf.NoSession {
go usage.ReportUsage(m, "java-sdk "+version.Version())
Expand Down
1 change: 1 addition & 0 deletions sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ public void initialize(URI uri, Configuration conf) throws IOException {
obj.put("cacheSize", Integer.valueOf(getConf(conf, "cache-size", "100")));
obj.put("openCache", Float.valueOf(getConf(conf, "open-cache", "0.0")));
obj.put("backupMeta", Integer.valueOf(getConf(conf, "backup-meta", "3600")));
obj.put("backupSkipTrash", Boolean.valueOf(getConf(conf, "backup-skip-trash", "false")));
obj.put("heartbeat", Integer.valueOf(getConf(conf, "heartbeat", "12")));
obj.put("attrTimeout", Float.valueOf(getConf(conf, "attr-cache", "0.0")));
obj.put("entryTimeout", Float.valueOf(getConf(conf, "entry-cache", "0.0")));
Expand Down
Loading