Skip to content

Commit

Permalink
CI: refactor and add dump test with subdir (#3863)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhoucheng361 authored Jul 5, 2023
1 parent 3ed4b50 commit e2fa798
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 103 deletions.
114 changes: 114 additions & 0 deletions .github/scripts/command/load_dump_bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/bin/bash -e

source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
[[ -z "$START_META" ]] && START_META=true
source .github/scripts/start_meta_engine.sh
META_URL=$(get_meta_url $META)
if [ "$START_META" = true ]; then
start_meta_engine $META
fi

test_load_dump_with_small_dir(){
prepare_test
create_database $META_URL
echo meta_url is: $META_URL
wget -q https://s.juicefs.com/static/bench/2M_emtpy_files.dump.gz
gzip -dfk 2M_emtpy_files.dump.gz
load_file=2M_emtpy_files.dump
start=`date +%s`
./juicefs load $META_URL $load_file
end=`date +%s`
runtime=$((end-start))
version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g')
python3 .github/scripts/db.py --name load_small_dir --result $runtime --version $version --meta $META --storage file
echo "load cost $runtime seconds"
start=`date +%s`
./juicefs dump $META_URL dump.json
end=`date +%s`
runtime=$((end-start))
echo "dump cost $runtime seconds"
python3 .github/scripts/db.py --name dump_small_dir --result $runtime --version $version --meta $META --storage file
./juicefs mount $META_URL /jfs -d --no-usage-report
inode=$(df -i /jfs | grep JuiceFS |awk -F" " '{print $3}')
if [ "$inode" -ne "2233313" ]; then
echo "<FATAL>: inode error: $inode"
exit 1
fi
}

test_load_dump_with_big_dir_subdir(){
do_load_dump_with_big_dir true
}

test_load_dump_with_big_dir(){
do_load_dump_with_big_dir false
}

do_load_dump_with_big_dir(){
with_subdir=$1
prepare_test
create_database $META_URL
echo meta_url is: $META_URL
wget -q https://s.juicefs.com/static/bench/1M_files_in_one_dir.dump.gz
gzip -dfk 1M_files_in_one_dir.dump.gz
load_file=1M_files_in_one_dir.dump
start=`date +%s`
./juicefs load $META_URL $load_file
end=`date +%s`
runtime=$((end-start))
echo "load cost $runtime seconds"
version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g')
python3 .github/scripts/db.py --name load_big_dir --result $runtime --version $version --meta $META --storage file
start=`date +%s`
if [ "$with_subdir" = true ] ; then
./juicefs dump $META_URL dump.json --subdir test
else
./juicefs dump $META_URL dump.json
fi
end=`date +%s`
runtime=$((end-start))
echo "dump cost $runtime seconds"
python3 .github/scripts/db.py --name dump_big_dir --result $runtime --version $version --meta $META --storage file
./juicefs mount $META_URL /jfs -d --no-usage-report
df -i /jfs
inode=$(df -i /jfs | grep JuiceFS |awk -F" " '{print $3}')
echo "inode: $inode"
if [ "$inode" -ne "1000003" ]; then
echo "<FATAL>: inode error: $inode"
exit 1
fi
}

test_list_with_big_dir(){
start=`date +%s`
file_count=$(ls -l /jfs/test/test-dir.0-0/mdtest_tree.0/ | wc -l)
echo "file_count: $file_count"
end=`date +%s`
runtime=$((end-start))
echo "list cost $runtime seconds"
version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g')
python3 .github/scripts/db.py --name list_big_dir --result $runtime --version $version --meta $META --storage file
if [ "$file_count" -ne "1000001" ]; then
echo "<FATAL>: file_count error: $file_count"
exit 1
fi
}

prepare_test()
{
umount_jfs /jfs $META_URL
ls -l /jfs/.config && exit 1 || true
./juicefs status $META_URL && UUID=$(./juicefs status $META_URL | grep UUID | cut -d '"' -f 4) || echo "meta not exist"
if [ -n "$UUID" ];then
./juicefs destroy --yes $META_URL $UUID
fi
# python3 .github/scripts/flush_meta.py $META_URL
# rm -rf /var/jfs/myjfs || true
# rm -rf /var/jfsCache/myjfs || true
}

source .github/scripts/common/run_test.sh && run_test $@


7 changes: 7 additions & 0 deletions .github/scripts/db.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@

import subprocess

try:
__import__("MySQLdb")
except ImportError:
subprocess.check_call(["pip", "install", "mysqlclient"])

import os
from sys import argv
import MySQLdb
Expand Down
5 changes: 5 additions & 0 deletions .github/scripts/testSync.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
subprocess.check_call(["pip", "install", "hypothesis"])
import random
import shutil
try:
__import__("hypothesis")
except ImportError:
subprocess.check_call(["pip", "install", "hypothesis"])

from hypothesis import given, strategies as st, settings, example
import os

Expand Down
5 changes: 5 additions & 0 deletions .github/scripts/testVersionCompatible.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
from hypothesis import Phase, assume, strategies as st
from hypothesis import seed
from packaging import version
import subprocess
try:
__import__("minio")
except ImportError:
subprocess.check_call(["pip", "install", "minio"])
from minio import Minio
import uuid
from utils import *
Expand Down
152 changes: 49 additions & 103 deletions .github/workflows/load.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@ name: "load-test"
on:
push:
branches:
- 'main'
- 'release-**'
paths-ignore:
- 'docs/**'
- '**.md'
pull_request:
branches:
- 'main'
- 'release-**'
paths-ignore:
- 'docs/**'
Expand All @@ -17,15 +25,44 @@ on:
default: false

jobs:
build-matrix:
runs-on: ubuntu-20.04
steps:
- id: set-matrix
run: |
echo "github.event_name is ${{github.event_name}}"
echo "GITHUB_REF_NAME is ${GITHUB_REF_NAME}"
if [ "${{github.event_name}}" == "schedule" ] || [ "${{github.event_name}}" == "workflow_dispatch" ]; then
echo 'meta_matrix=["sqlite3", "redis", "mysql", "tikv", "tidb", "postgres", "mariadb", "fdb"]' >> $GITHUB_OUTPUT
else
echo 'meta_matrix=["redis", "mysql", "tikv"]' >> $GITHUB_OUTPUT
# echo 'meta_matrix=["redis"]' >> $GITHUB_OUTPUT
fi
outputs:
meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

load:
needs: [build-matrix]
strategy:
fail-fast: false
matrix:
meta: [ 'sqlite3', 'redis', 'mysql', 'tikv', 'tidb', 'postgres', 'mariadb', 'badger', 'fdb']
# meta: ['redis']
meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}

runs-on: ubuntu-20.04

steps:
- name: Remove unused software
if: false
shell: bash
run: |
echo "before remove unused software"
sudo df -h
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
echo "after remove unused software"
sudo df -h
- name: Checkout
uses: actions/checkout@v3
with:
Expand All @@ -45,118 +82,26 @@ jobs:
with:
target: ${{steps.vars.outputs.target}}

- name: Prepare meta db
run: |
chmod +x .github/scripts/start_meta_engine.sh
source .github/scripts/start_meta_engine.sh
start_meta_engine ${{matrix.meta}}
- name: Install tools
run: |
sudo pip install mysqlclient
- name: Load and dump with small directory
timeout-minutes: 30
run: |
source .github/scripts/start_meta_engine.sh
meta_url=$(get_meta_url ${{matrix.meta}})
create_database $meta_url
echo meta_url is: $meta_url
mount_point=/tmp/juicefs-load-test
wget -q https://s.juicefs.com/static/bench/2M_emtpy_files.dump.gz
gzip -dk 2M_emtpy_files.dump.gz
load_file=2M_emtpy_files.dump
start=`date +%s`
./juicefs load $meta_url $load_file
end=`date +%s`
runtime=$((end-start))
export MYSQL_PASSWORD=${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}}
sudo chmod +x .github/scripts/db.py
version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g')
python3 .github/scripts/db.py --name load_small_dir --result $runtime --version $version --meta ${{matrix.meta}} --storage file
echo "load cost $runtime seconds"
start=`date +%s`
./juicefs dump $meta_url dump.json
end=`date +%s`
runtime=$((end-start))
echo "dump cost $runtime seconds"
python3 .github/scripts/db.py --name dump_small_dir --result $runtime --version $version --meta ${{matrix.meta}} --storage file
sudo mkdir /var/jfs
sudo chmod 777 /var/jfs
./juicefs mount $meta_url $mount_point -d --no-usage-report
inode=$(df -i $mount_point | grep JuiceFS |awk -F" " '{print $3}')
if [ "$inode" -ne "2233313" ]; then
echo "<FATAL>: inode error: $inode"
exit 1
fi
- name: Clear
run: |
source .github/scripts/start_meta_engine.sh
meta_url=$(get_meta_url ${{matrix.meta}})
mp=/tmp/juicefs-load-test
volume=jfs
test -d $mp && ./juicefs umount -f $mp
./juicefs status $meta_url && UUID=$(./juicefs status $meta_url | grep UUID | cut -d '"' -f 4) || echo "meta not exist"
if [ -n "$UUID" ];then
./juicefs destroy --yes $meta_url $UUID
fi
test -d /var/jfs/$volume && rm -rf /var/jfs/$volume || true
shell: bash
sudo MYSQL_PASSWORD=${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}} META=${{matrix.meta}} START_META=true .github/scripts/command/load_dump_bench.sh test_load_dump_with_small_dir
- name: Load and dump with big directory
timeout-minutes: 30
run: |
source .github/scripts/start_meta_engine.sh
meta_url=$(get_meta_url ${{matrix.meta}})
create_database $meta_url
echo meta_url is: $meta_url
mount_point=/tmp/juicefs-load-test
wget -q https://s.juicefs.com/static/bench/1M_files_in_one_dir.dump.gz
gzip -dk 1M_files_in_one_dir.dump.gz
load_file=1M_files_in_one_dir.dump
start=`date +%s`
./juicefs load $meta_url $load_file
end=`date +%s`
runtime=$((end-start))
echo "load cost $runtime seconds"
export MYSQL_PASSWORD=${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}}
sudo chmod +x .github/scripts/db.py
version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g')
python3 .github/scripts/db.py --name load_big_dir --result $runtime --version $version --meta ${{matrix.meta}} --storage file
start=`date +%s`
./juicefs dump $meta_url dump.json
end=`date +%s`
runtime=$((end-start))
echo "dump cost $runtime seconds"
python3 .github/scripts/db.py --name dump_big_dir --result $runtime --version $version --meta ${{matrix.meta}} --storage file
sudo chmod 777 /var/jfs
./juicefs mount $meta_url $mount_point -d --no-usage-report
df -i $mount_point
inode=$(df -i $mount_point | grep JuiceFS |awk -F" " '{print $3}')
echo "inode: $inode"
if [ "$inode" -ne "1000003" ]; then
echo "<FATAL>: inode error: $inode"
exit 1
fi
sudo MYSQL_PASSWORD=${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}} META=${{matrix.meta}} START_META=false .github/scripts/command/load_dump_bench.sh test_load_dump_with_big_dir
- name: Load and dump subdir with big directory
if: false
timeout-minutes: 30
run: |
sudo MYSQL_PASSWORD=${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}} META=${{matrix.meta}} START_META=false .github/scripts/command/load_dump_bench.sh test_load_dump_with_big_dir_subdir
- name: List big directory
timeout-minutes: 30
run: |
mount_point=/tmp/juicefs-load-test
start=`date +%s`
file_count=$(ls -l $mount_point/test/test-dir.0-0/mdtest_tree.0/ | wc -l)
echo "file_count: $file_count"
end=`date +%s`
runtime=$((end-start))
echo "list cost $runtime seconds"
export MYSQL_PASSWORD=${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}}
version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g')
python3 .github/scripts/db.py --name list_big_dir --result $runtime --version $version --meta ${{matrix.meta}} --storage file
if [ "$file_count" -ne "1000001" ]; then
echo "<FATAL>: file_count error: $file_count"
exit 1
fi
sudo MYSQL_PASSWORD=${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}} META=${{matrix.meta}} START_META=false .github/scripts/command/load_dump_bench.sh test_list_with_big_dir
- name: log
if: ${{ always() }}
Expand All @@ -169,6 +114,7 @@ jobs:
- name: Setup upterm session
if: ${{ failure() && github.event_name == 'workflow_dispatch' && github.event.inputs.debug == 'true' }}
# if: failure()
timeout-minutes: 60
uses: lhotari/action-upterm@v1

Expand Down

0 comments on commit e2fa798

Please sign in to comment.