Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 5 additions & 11 deletions .github/actions/benchmark_cloud/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,8 @@ inputs:
runs:
using: "composite"
steps:
- name: Install script dependencies
shell: bash
run: |
sudo apt-get update -yq
sudo apt-get install -yq python3

- name: Prepare
working-directory: benchmark/clickbench
working-directory: benchmark
shell: bash
id: prepare
env:
Expand All @@ -64,7 +58,7 @@ runs:
else
database="${{ inputs.database }}"
if [[ -z "$database" ]]; then
database="clickbench"
database="benchmark"
fi
echo "database=$database" >> $GITHUB_OUTPUT
fi
Expand All @@ -79,7 +73,7 @@ runs:
echo "tries=$tries" >> $GITHUB_OUTPUT

- name: Run Benchmark
working-directory: benchmark/clickbench
working-directory: benchmark
env:
BENCHMARK_ID: ${{ inputs.run_id }}
BENCHMARK_DATASET: ${{ inputs.dataset }}
Expand All @@ -104,8 +98,8 @@ runs:
with:
name: benchmark-${{ inputs.dataset }}-${{ inputs.size }}-cache-${{ inputs.cache_size }}
path: |
benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-cache-${{ inputs.cache_size }}.json
benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-cache-${{ inputs.cache_size }}-*.ndjson
benchmark/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-cache-${{ inputs.cache_size }}.json
benchmark/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-cache-${{ inputs.cache_size }}-*.ndjson

- name: Remove warehouse
if: always()
Expand Down
12 changes: 3 additions & 9 deletions .github/actions/benchmark_local/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,8 @@ inputs:
runs:
using: "composite"
steps:
- name: Install script dependencies
shell: bash
run: |
sudo apt-get update -yq
sudo apt-get install -yq python3

- name: Run Benchmark
working-directory: benchmark/clickbench
working-directory: benchmark
env:
BENCHMARK_ID: ${{ inputs.run_id }}
BENCHMARK_DATASET: ${{ inputs.dataset }}
Expand All @@ -42,7 +36,7 @@ runs:
name: benchmark_local

- name: Prepare Metadata
working-directory: benchmark/clickbench
working-directory: benchmark
shell: bash
run: |
case ${{ inputs.source }} in
Expand All @@ -63,4 +57,4 @@ runs:
uses: actions/upload-artifact@v4
with:
name: benchmark-${{ inputs.dataset }}-local
path: benchmark/clickbench/result-${{ inputs.dataset }}-local.json
path: benchmark/result-${{ inputs.dataset }}-local.json
30 changes: 13 additions & 17 deletions .github/workflows/reuse.benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -189,17 +189,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo apt-get update -yq
sudo apt-get install -yq python3-jinja2
- uses: astral-sh/setup-uv@v5
- uses: actions/download-artifact@v4
with:
path: benchmark/clickbench/results
path: benchmark/results
pattern: benchmark-*
merge-multiple: true
- name: Get Report Prefix
working-directory: benchmark/clickbench
working-directory: benchmark
run: |
shopt -s nullglob
for result in results/*.json; do
Expand All @@ -220,16 +217,17 @@ jobs:
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: auto
AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT_URL }}
working-directory: benchmark/clickbench
working-directory: benchmark
run: |
echo -e "## ClickBench Report\n" > /tmp/body
shopt -s nullglob
uv sync
for p in results/*; do
[ -d "$p" ] || continue
dataset=$(basename $p)
aws s3 sync results/$dataset/ ${REPORT_S3_PREFIX}/ --include "*.json" --no-progress --checksum-algorithm=CRC32
aws s3 sync "s3://benchmark/clickbench/release/${dataset}/latest/" ./results/${dataset}/ --exclude "*" --include "*.json" || true
./update_results.py --dataset $dataset --pr ${{ inputs.source_id }}
uv run update_results.py --dataset $dataset --pr ${{ inputs.source_id }}
aws s3 cp ./results/${dataset}.html ${REPORT_S3_PREFIX}/${dataset}.html --no-progress --checksum-algorithm=CRC32
echo "* **${dataset}**: https://benchmark.databend.com/clickbench/pr/${{ inputs.source_id }}/${{ inputs.run_id }}/${dataset}.html" >> /tmp/body
done
Expand All @@ -254,19 +252,17 @@ jobs:
# - "internal"
steps:
- uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo apt-get update -yq
sudo apt-get install -yq python3-jinja2
- uses: astral-sh/setup-uv@v5
- uses: actions/download-artifact@v4
with:
path: benchmark/clickbench/results
path: benchmark/results
pattern: benchmark-${{ matrix.dataset }}-*
merge-multiple: true
- name: Prepare results directory
working-directory: benchmark/clickbench
working-directory: benchmark
run: |
shopt -s nullglob
uv sync
for result in results/*.json; do
dataset=$(echo $result | sed -E 's/.*result-(\w+)-.*\.json/\1/')
mkdir -p results/${dataset}/
Expand All @@ -279,7 +275,7 @@ jobs:
mv $ndjson ndjsons/${dataset}/$(basename $ndjson)
done
- name: Generate report and upload to R2
working-directory: benchmark/clickbench
working-directory: benchmark
env:
AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
Expand All @@ -288,7 +284,7 @@ jobs:
run: |
aws s3 sync s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date --date='-1 month' -u +%Y)/$(date --date='-1 month' -u +%m)/ ./results/${{ matrix.dataset }}/
aws s3 sync s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date -u +%Y)/$(date -u +%m)/ ./results/${{ matrix.dataset }}/
./update_results.py --dataset ${{ matrix.dataset }} --release ${{ inputs.source_id }}
uv run update_results.py --dataset ${{ matrix.dataset }} --release ${{ inputs.source_id }}

RESULT_PREFIX="s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date -u +%Y)/$(date -u +%m)/$(date -u +%Y-%m-%d)/${{ inputs.source_id }}"
LATEST_PREFIX="s3://benchmark/clickbench/release/${{ matrix.dataset }}/latest/latest"
Expand All @@ -299,7 +295,7 @@ jobs:

aws s3 cp ./results/${{ matrix.dataset }}.html s3://benchmark/clickbench/release/${{ matrix.dataset }}.html --no-progress --checksum-algorithm=CRC32
- name: Upload NDJSON archives to R2
working-directory: benchmark/clickbench
working-directory: benchmark
env:
AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
Expand Down
9 changes: 4 additions & 5 deletions benchmark/clickbench/README.md → benchmark/README.md
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
# Benchmark Directory

This directory contains subdirectories dedicated to various performance tests,
This directory contains subdirectories dedicated to various performance tests,

specifically for TPCH tests, Hits tests, and internal query performance tests. Below is a brief overview of each subdirectory:

## 1. tpch

This subdirectory includes performance evaluation tools and scripts related to TPCH tests.
This subdirectory includes performance evaluation tools and scripts related to TPCH tests.

TPCH tests are designed to simulate complex query scenarios to assess the system's performance when handling large datasets. In this directory, you can find testing scripts, configuration files, and documentation for test results.

## 2. hits

Hits tests focus on specific queries or operations for performance testing.
Hits tests focus on specific queries or operations for performance testing.

In this subdirectory, you'll find scripts for Hits tests, sample queries, and performance analysis tools.

## 3. internal

The internal subdirectory contains testing tools and scripts dedicated to ensuring the performance of internal queries.
The internal subdirectory contains testing tools and scripts dedicated to ensuring the performance of internal queries.

These tests may be conducted to ensure the system performs well when handling internal queries specific.

Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ class ResultRecord:
version: str
warehouse: str
machine: str
cluster_size: str
tags: List[str]
result: List[List[float]]
values: Dict[str, List[float]]
Expand Down Expand Up @@ -103,7 +102,8 @@ def load_config() -> BenchmarkConfig:
benchmark_id = os.environ.get("BENCHMARK_ID", str(int(time.time())))
dataset = os.environ.get("BENCHMARK_DATASET", "hits")
size = os.environ.get("BENCHMARK_SIZE", "Small")
cache_size = os.environ.get("BENCHMARK_CACHE_SIZE", "0")
raw_cache_size = os.environ.get("BENCHMARK_CACHE_SIZE", "")
cache_size = raw_cache_size.strip() or "0"
version = os.environ.get("BENCHMARK_VERSION", "")
database = os.environ.get("BENCHMARK_DATABASE", "default")
tries_raw = os.environ.get("BENCHMARK_TRIES", "3")
Expand Down Expand Up @@ -162,9 +162,9 @@ def ensure_dependencies() -> None:
logger.info("bendsql version: %s", subprocess.check_output(["bendsql", "--version"]).decode().strip())


SIZE_MAPPING: Dict[str, Dict[str, str]] = {
"Small": {"cluster_size": "16", "machine": "Small"},
"Large": {"cluster_size": "64", "machine": "Large"},
SIZE_MAPPING: Dict[str, str] = {
"Small": "Small",
"Large": "Large",
}


Expand Down Expand Up @@ -282,8 +282,7 @@ def main() -> None:

run_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
tags = ["s3", f"cache-{config.cache_size}"]
cluster_size = SIZE_MAPPING[config.size]["cluster_size"]
machine = SIZE_MAPPING[config.size]["machine"]
machine = SIZE_MAPPING[config.size]
system: Optional[str] = None
comment: Optional[str] = None
if config.source and config.source_id:
Expand All @@ -307,7 +306,6 @@ def main() -> None:
version=config.version,
warehouse=config.warehouse,
machine=machine,
cluster_size=cluster_size,
tags=tags,
result=[],
values={},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ echo '{}' >result.json
yq -i ".date = \"$(date -u +%Y-%m-%d)\"" -o json result.json
yq -i ".load_time = ${load_time} | .data_size = ${data_size} | .result = []" -o json result.json
yq -i ".machine = \"${instance_type}\"" -o json result.json
yq -i '.cluster_size = 1' -o json result.json
yq -i '.tags = ["gp3"]' -o json result.json

echo "Running queries..."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ echo '{}' >result.json
yq -i ".date = \"$(date -u +%Y-%m-%d)\"" -o json result.json
yq -i ".load_time = ${load_time} | .data_size = ${data_size} | .result = []" -o json result.json
yq -i ".machine = \"${format_instance_type}\"" -o json result.json
yq -i '.cluster_size = 1' -o json result.json
yq -i '.tags = ["gp3"]' -o json result.json
yq -i ".system = \"${1}\"" -o json result.json

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading