Skip to content

Commit

Permalink
Merge branch 'main' into scan-delayed-slices-in-status
Browse files Browse the repository at this point in the history
  • Loading branch information
Hexilee committed Nov 15, 2022
2 parents a927ee6 + 2731d9e commit ab83346
Show file tree
Hide file tree
Showing 26 changed files with 496 additions and 57 deletions.
5 changes: 5 additions & 0 deletions cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ func clientFlags() []cli.Flag {
Value: defaultCacheDir,
Usage: "directory paths of local cache, use colon to separate multiple paths",
},
&cli.StringFlag{
Name: "cache-mode",
Value: "0600", // only owner can read/write cache
Usage: "file permissions for cached blocks",
},
&cli.IntFlag{
Name: "cache-size",
Value: 100 << 10,
Expand Down
78 changes: 63 additions & 15 deletions cmd/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,18 @@ package cmd

import (
"bufio"
"encoding/json"
"fmt"
"net"
"net/http"
_ "net/http/pprof"
"os"
"os/signal"
"path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
"syscall"
"time"
Expand Down Expand Up @@ -267,26 +270,63 @@ func registerMetaMsg(m meta.Meta, store chunk.ChunkStore, chunkConf *chunk.Confi
})
}

func prepareMp(mp string) {
func configEqual(a, b *vfs.Config) bool {
ac, bc := *a, *b
aFormat, bFormat := *ac.Format, *bc.Format
aFormat.SecretKey, bFormat.SecretKey = "", ""
ac.Meta, ac.Chunk, ac.Format, ac.Port, ac.AttrTimeout, ac.DirEntryTimeout, ac.EntryTimeout = nil, nil, nil, nil, 0, 0, 0
bc.Meta, bc.Chunk, bc.Format, bc.Port, bc.AttrTimeout, bc.DirEntryTimeout, bc.EntryTimeout = nil, nil, nil, nil, 0, 0, 0
return *a.Meta == *b.Meta && *a.Chunk == *b.Chunk && aFormat == bFormat && ac == bc
}

func prepareMp(newCfg *vfs.Config, mp string) (ignore bool) {
fi, err := os.Stat(mp)
if !strings.Contains(mp, ":") && err != nil {
if err != nil {
if strings.Contains(mp, ":") {
// Windows path, users should inspect mount point by themselves
return
}
if err := os.MkdirAll(mp, 0777); err != nil {
if os.IsExist(err) {
// a broken mount point, umount it
// a broken mount point, umount it and continue to mount
_ = doUmount(mp, true)
} else {
logger.Fatalf("create %s: %s", mp, err)
return
}
logger.Fatalf("create %s: %s", mp, err)
}
} else if err == nil {
ino, _ := utils.GetFileInode(mp)
if ino <= uint64(meta.RootInode) && fi.Size() == 0 {
// a broken mount point, umount it
_ = doUmount(mp, true)
} else if ino == uint64(meta.RootInode) {
logger.Warnf("%s is already mounted by juicefs, maybe you should umount it first.", mp)
}
return
}
if fi.Size() == 0 {
// a broken mount point, umount it and continue to mount
_ = doUmount(mp, true)
return
}

ino, _ := utils.GetFileInode(mp)
if ino != uint64(meta.RootInode) {
// not a mount point, just mount it
return
}

contents, err := os.ReadFile(path.Join(mp, ".config"))
if err != nil {
// failed to read juicefs config, continue to mount
return
}

originConfig := vfs.Config{}
if err = json.Unmarshal(contents, &originConfig); err != nil {
// not a valid juicefs config, continue to mount
return
}

if !configEqual(newCfg, &originConfig) {
// not the same juicefs, continue to mount
return
}

logger.Warnf("%s is already mounted by the same juicefs, ignored", mp)
return true
}

func getMetaConf(c *cli.Context, mp string, readOnly bool) *meta.Config {
Expand All @@ -312,6 +352,11 @@ func getMetaConf(c *cli.Context, mp string, readOnly bool) *meta.Config {
}

func getChunkConf(c *cli.Context, format *meta.Format) *chunk.Config {
cm, err := strconv.ParseUint(c.String("cache-mode"), 8, 32)
if err != nil {
logger.Warnf("Invalid cache-mode %s, using default value 0600", c.String("cache-mode"))
cm = 0600
}
chunkConf := &chunk.Config{
BlockSize: format.BlockSize * 1024,
Compress: format.Compression,
Expand All @@ -332,7 +377,7 @@ func getChunkConf(c *cli.Context, format *meta.Format) *chunk.Config {
CacheDir: c.String("cache-dir"),
CacheSize: int64(c.Int("cache-size")),
FreeSpace: float32(c.Float64("free-space-ratio")),
CacheMode: os.FileMode(0600),
CacheMode: os.FileMode(cm),
CacheFullBlock: !c.Bool("cache-partial-only"),
CacheChecksum: c.String("verify-cache-checksum"),
CacheScanInterval: duration(c.String("cache-scan-interval")),
Expand Down Expand Up @@ -500,7 +545,6 @@ func mount(c *cli.Context) error {
addr := c.Args().Get(0)
mp := c.Args().Get(1)

prepareMp(mp)
metaConf := getMetaConf(c, mp, c.Bool("read-only") || utils.StringContains(strings.Split(c.String("o"), ","), "ro"))
metaConf.CaseInsensi = strings.HasSuffix(mp, ":") && runtime.GOOS == "windows"
metaCli := meta.NewClient(addr, metaConf)
Expand Down Expand Up @@ -538,6 +582,10 @@ func mount(c *cli.Context) error {
registerMetaMsg(metaCli, store, chunkConf)

vfsConf := getVfsConf(c, metaConf, format, chunkConf)
ignore := prepareMp(vfsConf, mp)
if !c.Bool("force") && ignore {
return nil
}

if c.Bool("background") && os.Getenv("JFS_FOREGROUND") == "" {
daemonRun(c, addr, vfsConf, metaCli)
Expand Down
4 changes: 4 additions & 0 deletions cmd/mount_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ func mount_flags() []cli.Flag {
Name: "enable-ioctl",
Usage: "enable ioctl (support GETFLAGS/SETFLAGS only)",
},
&cli.BoolFlag{
Name: "force",
Usage: "force to mount even if the mount point is already mounted by the same filesystem",
},
&cli.BoolFlag{
Name: "update-fstab",
Usage: "add / update entry in /etc/fstab, will create a symlink at /sbin/mount.juicefs if not existing",
Expand Down
4 changes: 4 additions & 0 deletions docs/en/administration/metadata_dump_load.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ Starting with JuiceFS v1.0.0, the client automatically backs up metadata and cop

The backup files are stored in the `meta` directory of the object storage. It is a separate directory from the data store and not visible in the mount point and does not interact with the data store, and the directory can be viewed and managed using the file browser of the object storage.

:::tip
Automatic backup use Gzip compression, the backup files end with `.gz` and need to be decompressed with the `gzip -d` command first.
:::

![](../images/meta-auto-backup-list.png)

By default, the JuiceFS client backs up metadata once an hour. The frequency of automatic backups can be adjusted by the `--backup-meta` option when mounting the filesystem, for example, to set the auto-backup to be performed every 8 hours.
Expand Down
1 change: 1 addition & 0 deletions docs/en/getting-started/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
sidebar_label: Quick Start (Standalone Mode)
sidebar_position: 2
slug: /quick_start_guide
pagination_next: getting-started/for_distributed
---

# Quick Start Guide for Standalone Mode
Expand Down
1 change: 1 addition & 0 deletions docs/en/getting-started/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
sidebar_label: Installation
sidebar_position: 1
slug: /installation
pagination_prev: introduction/comparison/juicefs_vs_s3ql
---

# Installation
Expand Down
21 changes: 11 additions & 10 deletions docs/en/introduction/README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
---
title: Introduction to JuiceFS
sidebar_label: Introduction to JuiceFS
sidebar_position: 1
slug: .
pagination_next: introduction/architecture
---

# Introduction

**JuiceFS** is a high-performance shared file system designed for cloud-native use and released under the Apache License 2.0. It provides full [POSIX](https://en.wikipedia.org/wiki/POSIX) compatibility, allowing almost all kinds of object storage to be used locally as massive local disks and to be mounted and read on different cross-platform and cross-region hosts at the same time.
**JuiceFS** is an open source, high-performance distributed file system designed for the cloud, released under the Apache License 2.0. It provides full [POSIX](https://en.wikipedia.org/wiki/POSIX) compatibility, allowing almost all kinds of object storage to be used locally as massive local disks and to be mounted and read on different cross-platform and cross-region hosts at the same time.

JuiceFS implements a distributed file system by adopting the architecture that separates "data" and "metadata" storage. When using JuiceFS to store data, the data itself is persisted in [object storage](../guide/how_to_set_up_object_storage.md#supported-object-storage) (e.g. Amazon S3), and the corresponding metadata can be persisted in various [databases](../guide/how_to_set_up_metadata_engine.md) such as Redis, MySQL, TiKV, SQLite, etc., based on the scenarios and requirements.
JuiceFS separates "data" and "metadata" storage, files are split into chunks and stored in [object storage](../guide/how_to_set_up_object_storage.md#supported-object-storage) like Amazon S3, and the corresponding metadata can be stored in various [databases](../guide/how_to_set_up_metadata_engine.md) such as Redis, MySQL, TiKV, SQLite, etc., based on the scenarios and requirements.

JuiceFS provides rich APIs for various forms of data management, analysis, archiving, and backup. It can seamlessly interface with big data, machine learning, artificial intelligence and other application platforms without modifying code, and provide massive, elastic and high-performance storage at low cost. With JuiceFS, you do not need to worry about availability, disaster recovery, monitoring and expansion, and thus operation and maintaince work can be remarkably simplified, which helps companies focus more on business development and R&D efficiency improvement.
JuiceFS provides rich APIs for various forms of data management, analysis, archiving, and backup. It can seamlessly interface with big data, machine learning, artificial intelligence and other application platforms without modifying code, and provide massive, elastic and high-performance storage at low cost. With JuiceFS, you do not need to worry about availability, disaster recovery, monitoring and expansion, and thus maintainence work can be greatly reduced, perfect for DevOps.

## Features

1. **POSIX Compatible** JuiceFS can be used like a local file system as it seamlessly interfaces with existing applications.
1. **POSIX Compatible** JuiceFS can be used like a local file system, making it easy to integrate with existing applications.
2. **HDFS Compatible**: JuiceFS is fully compatible with [HDFS API](../deployment/hadoop_java_sdk.md), which can enhance metadata performance.
3. **S3 Compatible**: JuiceFS provides [S3 gateway](../deployment/s3_gateway.md) to implement an S3-compatible access interface.
4. **Cloud-Native**: It is easy to use JuiceFS in Kubernetes via [CSI Driver](../deployment/how_to_use_on_kubernetes.md).
Expand All @@ -27,13 +28,13 @@ JuiceFS provides rich APIs for various forms of data management, analysis, archi

## Scenarios

JuiceFS is designed for massive data storage and can be used as an alternative to many distributed file systems and network file systems, especially for the following scenarios.
JuiceFS is designed for massive data storage and can be used as an alternative to many distributed file systems and network file systems, especially for the following scenarios:

- **Big Data Analytics**: compatible with HDFS without requiring extra API; seamlessly integrated with mainstream computing engines (Spark, Presto, Hive, etc.); unlimited storage space; nearly zero operation and maintenance costs; well-developed caching mechanism, and better performance than object storage.
- **Machine Learning**: compatible with POSIX, supporting all machine learning and deep learning frameworks; shareable file storage, which can improve the efficiency of team management and data use.
- **Persistent volumes in container clusters**: supporting Kubernetes CSI; persistent storage and independent of container lifetime; strong consistency to ensure that date stored is correct; take over data storage requirements to ensure statelessness of the service.
- **Big Data**: JuiceFS is compatible with HDFS and can be seamlessly integrated with mainstream computing engines (Spark, Presto, Hive, etc.), bringing much better performance than directly using object storage.
- **Machine Learning**: JuiceFS is compatible with POSIX, and supports all machine learning and deep learning frameworks; As a shareable file storage, JuiceFS can improve the efficiency of team management and data usage.
- **Kubernetes**: JuiceFS supports Kubernetes CSI, providing decoupled persistent storage for pods so that your application can be stateless, also great for data sharing among containers.
- **Shared Workspace**: JuiceFS file system can be mounted on any host; no restrictions to client concurrent read/write; POSIX compatible with existing data flow and scripting operations.
- **Data Backup**: Back up all kinds of data in scalable storage space without limitation; combined with the shared mount feature, data from multiple hosts can be aggregated into one place and then backed up together.
- **Data Backup**: Backup all kinds of data in scalable storage space without limitation; combined with the shared mount feature, data from multiple hosts can be aggregated into one place and then backed up together.

## Data Privacy

Expand Down
4 changes: 4 additions & 0 deletions docs/zh_cn/administration/metadata_dump_load.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ juicefs dump redis://192.168.1.6:6379/1 meta.dump --subdir /path/in/juicefs

备份的文件存储在对象存储的 `meta` 目录中,它是一个独立于数据存储的目录,在挂载点中不可见,也不会与数据存储之间产生影响,用对象存储的文件浏览器即可查看和管理。

:::tip 提示
自动备份采用 Gzip 压缩,备份的文件以 `.gz` 结尾,使用时需要先用 `gzip -d` 命令解压。
:::

![](../images/meta-auto-backup-list.png)

默认情况下,JuiceFS 客户端每小时备份一次元数据,自动备份的频率可以在挂载文件系统时通过 `--backup-meta` 选项进行调整,例如,要设置为每 8 个小时执行一次自动备份:
Expand Down
Loading

0 comments on commit ab83346

Please sign in to comment.