Skip to content

Commit 0268210

Browse files
authored
ci: benchmark optimize (#19086)
1 parent 5fcffad commit 0268210

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+527
-84
lines changed

.github/actions/benchmark_cloud/action.yml

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,8 @@ inputs:
4646
runs:
4747
using: "composite"
4848
steps:
49-
- name: Install script dependencies
50-
shell: bash
51-
run: |
52-
sudo apt-get update -yq
53-
sudo apt-get install -yq python3
54-
5549
- name: Prepare
56-
working-directory: benchmark/clickbench
50+
working-directory: benchmark
5751
shell: bash
5852
id: prepare
5953
env:
@@ -64,7 +58,7 @@ runs:
6458
else
6559
database="${{ inputs.database }}"
6660
if [[ -z "$database" ]]; then
67-
database="clickbench"
61+
database="benchmark"
6862
fi
6963
echo "database=$database" >> $GITHUB_OUTPUT
7064
fi
@@ -79,7 +73,7 @@ runs:
7973
echo "tries=$tries" >> $GITHUB_OUTPUT
8074
8175
- name: Run Benchmark
82-
working-directory: benchmark/clickbench
76+
working-directory: benchmark
8377
env:
8478
BENCHMARK_ID: ${{ inputs.run_id }}
8579
BENCHMARK_DATASET: ${{ inputs.dataset }}
@@ -104,8 +98,8 @@ runs:
10498
with:
10599
name: benchmark-${{ inputs.dataset }}-${{ inputs.size }}-cache-${{ inputs.cache_size }}
106100
path: |
107-
benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-cache-${{ inputs.cache_size }}.json
108-
benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-cache-${{ inputs.cache_size }}-*.ndjson
101+
benchmark/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-cache-${{ inputs.cache_size }}.json
102+
benchmark/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-cache-${{ inputs.cache_size }}-*.ndjson
109103
110104
- name: Remove warehouse
111105
if: always()

.github/actions/benchmark_local/action.yml

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,8 @@ inputs:
2020
runs:
2121
using: "composite"
2222
steps:
23-
- name: Install script dependencies
24-
shell: bash
25-
run: |
26-
sudo apt-get update -yq
27-
sudo apt-get install -yq python3
28-
2923
- name: Run Benchmark
30-
working-directory: benchmark/clickbench
24+
working-directory: benchmark
3125
env:
3226
BENCHMARK_ID: ${{ inputs.run_id }}
3327
BENCHMARK_DATASET: ${{ inputs.dataset }}
@@ -42,7 +36,7 @@ runs:
4236
name: benchmark_local
4337

4438
- name: Prepare Metadata
45-
working-directory: benchmark/clickbench
39+
working-directory: benchmark
4640
shell: bash
4741
run: |
4842
case ${{ inputs.source }} in
@@ -63,4 +57,4 @@ runs:
6357
uses: actions/upload-artifact@v4
6458
with:
6559
name: benchmark-${{ inputs.dataset }}-local
66-
path: benchmark/clickbench/result-${{ inputs.dataset }}-local.json
60+
path: benchmark/result-${{ inputs.dataset }}-local.json

.github/workflows/reuse.benchmark.yml

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -189,17 +189,14 @@ jobs:
189189
runs-on: ubuntu-latest
190190
steps:
191191
- uses: actions/checkout@v4
192-
- name: Install Dependencies
193-
run: |
194-
sudo apt-get update -yq
195-
sudo apt-get install -yq python3-jinja2
192+
- uses: astral-sh/setup-uv@v5
196193
- uses: actions/download-artifact@v4
197194
with:
198-
path: benchmark/clickbench/results
195+
path: benchmark/results
199196
pattern: benchmark-*
200197
merge-multiple: true
201198
- name: Get Report Prefix
202-
working-directory: benchmark/clickbench
199+
working-directory: benchmark
203200
run: |
204201
shopt -s nullglob
205202
for result in results/*.json; do
@@ -220,16 +217,17 @@ jobs:
220217
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
221218
AWS_DEFAULT_REGION: auto
222219
AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT_URL }}
223-
working-directory: benchmark/clickbench
220+
working-directory: benchmark
224221
run: |
225222
echo -e "## ClickBench Report\n" > /tmp/body
226223
shopt -s nullglob
224+
uv sync
227225
for p in results/*; do
228226
[ -d "$p" ] || continue
229227
dataset=$(basename $p)
230228
aws s3 sync results/$dataset/ ${REPORT_S3_PREFIX}/ --include "*.json" --no-progress --checksum-algorithm=CRC32
231229
aws s3 sync "s3://benchmark/clickbench/release/${dataset}/latest/" ./results/${dataset}/ --exclude "*" --include "*.json" || true
232-
./update_results.py --dataset $dataset --pr ${{ inputs.source_id }}
230+
uv run update_results.py --dataset $dataset --pr ${{ inputs.source_id }}
233231
aws s3 cp ./results/${dataset}.html ${REPORT_S3_PREFIX}/${dataset}.html --no-progress --checksum-algorithm=CRC32
234232
echo "* **${dataset}**: https://benchmark.databend.com/clickbench/pr/${{ inputs.source_id }}/${{ inputs.run_id }}/${dataset}.html" >> /tmp/body
235233
done
@@ -254,19 +252,17 @@ jobs:
254252
# - "internal"
255253
steps:
256254
- uses: actions/checkout@v4
257-
- name: Install Dependencies
258-
run: |
259-
sudo apt-get update -yq
260-
sudo apt-get install -yq python3-jinja2
255+
- uses: astral-sh/setup-uv@v5
261256
- uses: actions/download-artifact@v4
262257
with:
263-
path: benchmark/clickbench/results
258+
path: benchmark/results
264259
pattern: benchmark-${{ matrix.dataset }}-*
265260
merge-multiple: true
266261
- name: Prepare results directory
267-
working-directory: benchmark/clickbench
262+
working-directory: benchmark
268263
run: |
269264
shopt -s nullglob
265+
uv sync
270266
for result in results/*.json; do
271267
dataset=$(echo $result | sed -E 's/.*result-(\w+)-.*\.json/\1/')
272268
mkdir -p results/${dataset}/
@@ -279,7 +275,7 @@ jobs:
279275
mv $ndjson ndjsons/${dataset}/$(basename $ndjson)
280276
done
281277
- name: Generate report and upload to R2
282-
working-directory: benchmark/clickbench
278+
working-directory: benchmark
283279
env:
284280
AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
285281
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
@@ -288,7 +284,7 @@ jobs:
288284
run: |
289285
aws s3 sync s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date --date='-1 month' -u +%Y)/$(date --date='-1 month' -u +%m)/ ./results/${{ matrix.dataset }}/
290286
aws s3 sync s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date -u +%Y)/$(date -u +%m)/ ./results/${{ matrix.dataset }}/
291-
./update_results.py --dataset ${{ matrix.dataset }} --release ${{ inputs.source_id }}
287+
uv run update_results.py --dataset ${{ matrix.dataset }} --release ${{ inputs.source_id }}
292288
293289
RESULT_PREFIX="s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date -u +%Y)/$(date -u +%m)/$(date -u +%Y-%m-%d)/${{ inputs.source_id }}"
294290
LATEST_PREFIX="s3://benchmark/clickbench/release/${{ matrix.dataset }}/latest/latest"
@@ -299,7 +295,7 @@ jobs:
299295
300296
aws s3 cp ./results/${{ matrix.dataset }}.html s3://benchmark/clickbench/release/${{ matrix.dataset }}.html --no-progress --checksum-algorithm=CRC32
301297
- name: Upload NDJSON archives to R2
302-
working-directory: benchmark/clickbench
298+
working-directory: benchmark
303299
env:
304300
AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
305301
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
Lines changed: 4 additions & 5 deletions

benchmark/clickbench/benchmark_cloud.py renamed to benchmark/benchmark_cloud.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ class ResultRecord:
4444
version: str
4545
warehouse: str
4646
machine: str
47-
cluster_size: str
4847
tags: List[str]
4948
result: List[List[float]]
5049
values: Dict[str, List[float]]
@@ -103,7 +102,8 @@ def load_config() -> BenchmarkConfig:
103102
benchmark_id = os.environ.get("BENCHMARK_ID", str(int(time.time())))
104103
dataset = os.environ.get("BENCHMARK_DATASET", "hits")
105104
size = os.environ.get("BENCHMARK_SIZE", "Small")
106-
cache_size = os.environ.get("BENCHMARK_CACHE_SIZE", "0")
105+
raw_cache_size = os.environ.get("BENCHMARK_CACHE_SIZE", "")
106+
cache_size = raw_cache_size.strip() or "0"
107107
version = os.environ.get("BENCHMARK_VERSION", "")
108108
database = os.environ.get("BENCHMARK_DATABASE", "default")
109109
tries_raw = os.environ.get("BENCHMARK_TRIES", "3")
@@ -162,9 +162,9 @@ def ensure_dependencies() -> None:
162162
logger.info("bendsql version: %s", subprocess.check_output(["bendsql", "--version"]).decode().strip())
163163

164164

165-
SIZE_MAPPING: Dict[str, Dict[str, str]] = {
166-
"Small": {"cluster_size": "16", "machine": "Small"},
167-
"Large": {"cluster_size": "64", "machine": "Large"},
165+
SIZE_MAPPING: Dict[str, str] = {
166+
"Small": "Small",
167+
"Large": "Large",
168168
}
169169

170170

@@ -282,8 +282,7 @@ def main() -> None:
282282

283283
run_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
284284
tags = ["s3", f"cache-{config.cache_size}"]
285-
cluster_size = SIZE_MAPPING[config.size]["cluster_size"]
286-
machine = SIZE_MAPPING[config.size]["machine"]
285+
machine = SIZE_MAPPING[config.size]
287286
system: Optional[str] = None
288287
comment: Optional[str] = None
289288
if config.source and config.source_id:
@@ -307,7 +306,6 @@ def main() -> None:
307306
version=config.version,
308307
warehouse=config.warehouse,
309308
machine=machine,
310-
cluster_size=cluster_size,
311309
tags=tags,
312310
result=[],
313311
values={},

benchmark/clickbench/benchmark_local.sh renamed to benchmark/benchmark_local.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ echo '{}' >result.json
9090
yq -i ".date = \"$(date -u +%Y-%m-%d)\"" -o json result.json
9191
yq -i ".load_time = ${load_time} | .data_size = ${data_size} | .result = []" -o json result.json
9292
yq -i ".machine = \"${instance_type}\"" -o json result.json
93-
yq -i '.cluster_size = 1' -o json result.json
9493
yq -i '.tags = ["gp3"]' -o json result.json
9594

9695
echo "Running queries..."

benchmark/clickbench/benchmark_local_merge_into.sh renamed to benchmark/benchmark_local_merge_into.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ echo '{}' >result.json
7979
yq -i ".date = \"$(date -u +%Y-%m-%d)\"" -o json result.json
8080
yq -i ".load_time = ${load_time} | .data_size = ${data_size} | .result = []" -o json result.json
8181
yq -i ".machine = \"${format_instance_type}\"" -o json result.json
82-
yq -i '.cluster_size = 1' -o json result.json
8382
yq -i '.tags = ["gp3"]' -o json result.json
8483
yq -i ".system = \"${1}\"" -o json result.json
8584

File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)