From 657691be4ca6beb43633dc768746a0a6d99381e1 Mon Sep 17 00:00:00 2001 From: zhijian Date: Mon, 11 Mar 2024 17:03:37 +0800 Subject: [PATCH 1/3] do not print user-defined labels in the internal file `.stat` (#4478) --- pkg/vfs/internal.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/vfs/internal.go b/pkg/vfs/internal.go index cb75e7298230..52ba913385b5 100644 --- a/pkg/vfs/internal.go +++ b/pkg/vfs/internal.go @@ -173,7 +173,7 @@ func collectMetrics(registry *prometheus.Registry) []byte { for _, m := range mf.Metric { var name = *mf.Name for _, l := range m.Label { - if *l.Name != "mp" && *l.Name != "vol_name" { + if (name == "juicefs_object_request_durations_histogram_seconds" || name == "juicefs_object_request_data_bytes") && *l.Name == "method" { name += "_" + *l.Value } } From f80dd7adc92c566d0c9abc8ea41b2262f3ae2d9d Mon Sep 17 00:00:00 2001 From: Zhou Cheng Date: Mon, 11 Mar 2024 17:47:50 +0800 Subject: [PATCH 2/3] CI: add sync tests. (#4482) --- .github/scripts/hypo/syncrand.py | 26 ++++------ .github/scripts/hypo/syncrand_test.py | 32 ++++++++++++ .github/scripts/sync/sync_fsrand.sh | 2 +- .github/scripts/sync/sync_minio.sh | 18 +++++++ .github/workflows/sync.yml | 75 +++++++++++++++++---------- 5 files changed, 109 insertions(+), 44 deletions(-) diff --git a/.github/scripts/hypo/syncrand.py b/.github/scripts/hypo/syncrand.py index ae72586db196..2785331cc4b6 100644 --- a/.github/scripts/hypo/syncrand.py +++ b/.github/scripts/hypo/syncrand.py @@ -13,22 +13,15 @@ from fs_op import FsOperation import random -st_entry_name = st.text(alphabet='abc', min_size=1, max_size=3) -st_patterns = st.text(alphabet='abc?/*', min_size=1, max_size=5).\ - filter(lambda s: s.find('***') == -1 or s.endswith('/***')) -st_patterns = st.lists(st.sampled_from(['a','?','/','*', '/***']), min_size=1, max_size=10)\ - .map(''.join).filter(lambda s: s.find('***') == -1 or (s.count('/***')==1 and s.endswith('a/***'))) +st_entry_name = st.text(alphabet='abc*?', min_size=1, max_size=4) st_patterns = st.lists(st.sampled_from(['a','?','/','*']), min_size=1, max_size=10)\ - .map(''.join).filter(lambda s: s.find('***') == -1 ) -st_patterns = st.lists(st.sampled_from(['a','?','/','*']), min_size=1, max_size=10)\ - .map(''.join).filter(lambda s: s.find('**') == -1 ) + .map(''.join).filter(lambda s: s.find('***') == -1 or (s.count('***') == 1 and s.endswith('/***'))) st_option = st.fixed_dictionaries({ "option": st.just("--include") | st.just("--exclude"), "pattern": st_patterns }) -st_options = st.lists(st_option, min_size=1, max_size=10).\ - filter(lambda self: any(item["pattern"].endswith('/***') for item in self)) + st_options = st.lists(st_option, min_size=1, max_size=10) SEED=int(os.environ.get('SEED', random.randint(0, 1000000000))) @@ -42,7 +35,7 @@ class SyncMachine(RuleBasedStateMachine): DEST_JUICESYNC = '/tmp/juicesync' log_level = os.environ.get('LOG_LEVEL', 'INFO') logger = common.setup_logger(f'./syncrand.log', 'syncrand_logger', log_level) - fsop = FsOperation(logger) + fsop = FsOperation({ROOT_DIR1: logger, ROOT_DIR2: logger}) @initialize(target=Folders) def init_folders(self): @@ -109,11 +102,12 @@ def mkdir(self, parent, subdir, mode, user='root', umask=0o022): def sync(self, options): subprocess.check_call(['rm', '-rf', self.DEST_RSYNC]) subprocess.check_call(['rm', '-rf', self.DEST_JUICESYNC]) - options = ' '.join([f'{item["option"]} {item["pattern"]}' for item in options]) - self.logger.info(f'rsync -r -vvv {self.ROOT_DIR1}/ {self.DEST_RSYNC}/ {options}') - subprocess.check_call(f'rsync -r -vvv {self.ROOT_DIR1}/ {self.DEST_RSYNC}/ {options}'.split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - self.logger.info(f'./juicefs sync --dirs -v {self.ROOT_DIR1}/ {self.DEST_JUICESYNC}/ {options}') - subprocess.check_call(f'./juicefs sync --dirs -v {self.ROOT_DIR1}/ {self.DEST_JUICESYNC}/ {options}'.split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + options_run = ' '.join([f'{item["option"]} {item["pattern"]}' for item in options]) + options_display = ' '.join([f'{item["option"]} "{item["pattern"]}"' for item in options]) + self.logger.info(f'rsync -r -vvv {self.ROOT_DIR1}/ {self.DEST_RSYNC}/ {options_display}') + subprocess.check_call(f'rsync -r -vvv {self.ROOT_DIR1}/ {self.DEST_RSYNC}/ {options_run}'.split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + self.logger.info(f'./juicefs sync --dirs -v {self.ROOT_DIR1}/ {self.DEST_JUICESYNC}/ {options_display}') + subprocess.check_call(f'./juicefs sync --dirs -v {self.ROOT_DIR1}/ {self.DEST_JUICESYNC}/ {options_run}'.split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) try: subprocess.check_call(['diff', '-r', self.DEST_RSYNC, self.DEST_JUICESYNC]) except subprocess.CalledProcessError as e: diff --git a/.github/scripts/hypo/syncrand_test.py b/.github/scripts/hypo/syncrand_test.py index fc0cd9176bf1..ab8b84eb0513 100644 --- a/.github/scripts/hypo/syncrand_test.py +++ b/.github/scripts/hypo/syncrand_test.py @@ -43,5 +43,37 @@ def test_sync5(self): {'option': '--exclude', 'pattern': 'a?**'}]) state.teardown() + def test_sync6(self): + state = SyncMachine() + v1 = state.init_folders() + v2 = state.create_file(content=b'', file_name='a', mode='w', parent=v1, umask=0) + state.sync(options=[{'option': '--exclude', 'pattern': '**a'}]) + state.teardown() + + def test_sync7(self): + state = SyncMachine() + v1 = state.init_folders() + v2 = state.create_file(content=b'', file_name='aa', mode='w', parent=v1, umask=0) + state.sync(options=[{'option': '--exclude', 'pattern': 'aa**a'}]) + state.teardown() + + def test_sync8(self): + # SEE: https://github.com/juicedata/juicefs/issues/4471 + state = SyncMachine() + v1 = state.init_folders() + v2 = state.mkdir(mode=8, parent=v1, subdir='a', umask=0) + state.sync(options=[{'option': '--exclude', 'pattern': 'a/**/a'}]) + state.teardown() + + def test_sync9(self): + # SEE: https://github.com/juicedata/juicefs/issues/4471 + state = SyncMachine() + v1 = state.init_folders() + v2 = state.mkdir(mode=8, parent=v1, subdir='aa', umask=0) + v3 = state.create_file(content=b'', file_name='a', mode='w', parent=v2, umask=0) + state.sync(options=[{'option': '--include', 'pattern': '**aa**'}, + {'option': '--exclude', 'pattern': 'a'}]) + state.teardown() + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/.github/scripts/sync/sync_fsrand.sh b/.github/scripts/sync/sync_fsrand.sh index bc02e6daa828..62eced0f3ac3 100755 --- a/.github/scripts/sync/sync_fsrand.sh +++ b/.github/scripts/sync/sync_fsrand.sh @@ -88,7 +88,7 @@ test_list_threads(){ check_diff $DEST_DIR1 $DEST_DIR2 } -test_update(){ +skip_test_update(){ prepare_test ./juicefs mount $META_URL /tmp/jfs -d sync_option="--dirs --perms --check-all --links --list-threads 10 --list-depth 5" diff --git a/.github/scripts/sync/sync_minio.sh b/.github/scripts/sync/sync_minio.sh index 2ac4aefcec61..bd1e398262a7 100755 --- a/.github/scripts/sync/sync_minio.sh +++ b/.github/scripts/sync/sync_minio.sh @@ -115,7 +115,25 @@ prepare_test(){ ./juicefs mount -d $META_URL /jfs lsof -i :9005 | awk 'NR!=1 {print $2}' | xargs -r kill -9 || true MINIO_ROOT_USER=minioadmin MINIO_ROOT_PASSWORD=minioadmin ./juicefs gateway $META_URL localhost:9005 & + wait_gateway_ready ./mc alias set juicegw http://localhost:9005 minioadmin minioadmin --api S3v4 } +wait_gateway_ready(){ + timeout=30 + for i in $(seq 1 $timeout); do + if [[ -z $(lsof -i :9005) ]]; then + echo "$i Waiting for port 9005 to be ready..." + sleep 1 + else + echo "gateway is now ready on port 9005" + break + fi + done + if [[ -z $(lsof -i :9005) ]]; then + echo "gateway is not ready after $timeout seconds" + exit 1 + fi +} + source .github/scripts/common/run_test.sh && run_test $@ diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml index af3596d5c00c..80b21e856dfa 100644 --- a/.github/workflows/sync.yml +++ b/.github/workflows/sync.yml @@ -33,6 +33,10 @@ on: jobs: sync: runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + type: ['sync', 'sync_fsrand', 'sync_minio', 'sync_cluster', 'sync_exclude'] steps: - name: Checkout uses: actions/checkout@v3 @@ -41,40 +45,57 @@ jobs: - name: Build uses: ./.github/actions/build - # with: - # useBeta: true - name: Test Sync timeout-minutes: 30 run: | - sudo META=redis .github/scripts/sync/sync.sh - - - name: Test Sync with fsrand - timeout-minutes: 30 - run: | - sudo META=redis .github/scripts/sync/sync_fsrand.sh - - - name: Test Sync with mino - timeout-minutes: 30 - run: | - sudo META=redis .github/scripts/sync/sync_minio.sh - - - name: Test Sync with multi workers - timeout-minutes: 30 - run: | - # not supported algo: "dsa" "ecdsa-sk" "ed25519-sk" - types=("ecdsa" "ed25519" "rsa") - random_type=${types[$RANDOM % ${#types[@]}]} - sudo CI=true META=redis KEY_TYPE=$random_type .github/scripts/sync/sync_cluster.sh - - - name: Test sync include/exclude option - timeout-minutes: 30 - run: | - # sudo python3 .github/scripts/hypo/syncrand_test.py - sudo LOG_LEVEL=WARNING PROFILE=ci python3 .github/scripts/hypo/syncrand.py + if [[ "${{matrix.type}}" == 'sync' ]]; then + sudo META=redis .github/scripts/sync/sync.sh + elif [[ "${{matrix.type}}" == 'sync_fsrand' ]]; then + sudo META=redis .github/scripts/sync/sync_fsrand.sh + elif [[ "${{matrix.type}}" == 'sync_minio' ]]; then + sudo META=redis .github/scripts/sync/sync_minio.sh + elif [[ "${{matrix.type}}" == 'sync_cluster' ]]; then + types=("ecdsa" "ed25519" "rsa") + random_type=${types[$RANDOM % ${#types[@]}]} + sudo CI=true META=redis KEY_TYPE=$random_type .github/scripts/sync/sync_cluster.sh + elif [[ "${{matrix.type}}" == 'sync_exclude' ]]; then + sudo python3 .github/scripts/hypo/syncrand_test.py + if [ "${{github.event_name}}" == "pull_request" ]; then + sudo MAX_EXAMPLE=100 STEP_COUNT=50 LOG_LEVEL=WARNING PROFILE=ci python3 .github/scripts/hypo/syncrand.py + else + sudo MAX_EXAMPLE=1000 STEP_COUNT=200 LOG_LEVEL=WARNING PROFILE=ci python3 .github/scripts/hypo/syncrand.py + fi + else + echo "Unknown type: ${{matrix.type}}" + exit 1 + fi - name: Setup upterm session if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1) # if: failure() timeout-minutes: 60 uses: lhotari/action-upterm@v1 + + success-all-test: + runs-on: ubuntu-latest + needs: [sync] + if: always() + steps: + - uses: technote-space/workflow-conclusion-action@v3 + - uses: actions/checkout@v3 + + - name: Check Failure + if: env.WORKFLOW_CONCLUSION == 'failure' + run: exit 1 + + - name: Send Slack Notification + if: failure() && github.event_name != 'workflow_dispatch' + uses: juicedata/slack-notify-action@main + with: + channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}" + slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}" + + - name: Success + if: success() + run: echo "All Done" \ No newline at end of file From c4a7bd97b5069b14511fbedc9470665ecab2ded1 Mon Sep 17 00:00:00 2001 From: zhijian Date: Mon, 11 Mar 2024 17:54:00 +0800 Subject: [PATCH 3/3] metrics: export meta backup metrics (#4480) --- cmd/mount.go | 2 ++ pkg/vfs/backup.go | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/cmd/mount.go b/cmd/mount.go index c74d0b85678d..5468a881196c 100644 --- a/cmd/mount.go +++ b/cmd/mount.go @@ -360,6 +360,8 @@ func initBackgroundTasks(c *cli.Context, vfsConf *vfs.Config, metaConf *meta.Con vfsConf.Port.ConsulAddr = c.String("consul") } if !metaConf.ReadOnly && !metaConf.NoBGJob && vfsConf.BackupMeta > 0 { + registerer.MustRegister(vfs.LastBackupTimeG) + registerer.MustRegister(vfs.LastBackupDurationG) go vfs.Backup(m, blob, vfsConf.BackupMeta) } if !c.Bool("no-usage-report") { diff --git a/pkg/vfs/backup.go b/pkg/vfs/backup.go index c31ee5f6cd88..33bb5342e5ff 100644 --- a/pkg/vfs/backup.go +++ b/pkg/vfs/backup.go @@ -27,6 +27,18 @@ import ( "github.com/juicedata/juicefs/pkg/object" osync "github.com/juicedata/juicefs/pkg/sync" "github.com/juicedata/juicefs/pkg/utils" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + LastBackupTimeG = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "last_successful_backup", + Help: "Last successful backup.", + }) + LastBackupDurationG = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "last_backup_duration", + Help: "Last backup duration.", + }) ) // Backup metadata periodically in the object storage @@ -66,10 +78,12 @@ func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) { go cleanupBackups(blob, now) logger.Debugf("backup metadata started") if err = backup(m, blob, now); err == nil { + LastBackupTimeG.Set(float64(now.UnixNano()) / 1e9) logger.Infof("backup metadata succeed, used %s", time.Since(now)) } else { logger.Warnf("backup metadata failed: %s", err) } + LastBackupDurationG.Set(time.Since(now).Seconds()) } } }