Skip to content

GPU benchmark regression test #450

GPU benchmark regression test

GPU benchmark regression test #450

Workflow file for this run

# Run regression check only when attempting to merge, shown as skipped status check beforehand
name: GPU benchmark regression test
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
branches: [dev]
merge_group:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
# Run comparative benchmark against dev, open issue on regression
gpu-benchmark:
if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
name: Run benchmarks on GPU
runs-on: [self-hosted, gpu-bench]
steps:
- uses: actions/checkout@v4
with:
repository: lurk-lab/ci-workflows
- uses: ./.github/actions/gpu-setup
with:
gpu-framework: 'cuda'
- uses: ./.github/actions/ci-env
- uses: actions/checkout@v4
# Install dependencies
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@v2
with:
tool: [email protected]
- name: Install criterion
run: |
cargo install cargo-criterion
cargo install criterion-table
- name: Set bench output format and base SHA
run: |
echo "BENCH_OUTPUT=commit-comment" | tee -a $GITHUB_ENV
echo "BENCH_NUM_CONS=16384,1038732" | tee -a $GITHUB_ENV
echo "BASE_COMMIT=${{ github.event.merge_group.base_sha }}" | tee -a $GITHUB_ENV
GPU_NAME=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader,nounits | tail -n1)
echo "GPU_ID=$(echo $GPU_NAME | awk '{ print $NF }')" | tee -a $GITHUB_ENV
echo "GPU_NAME=$GPU_NAME" | tee -a $GITHUB_ENV
# Checkout base branch for comparative bench
- uses: actions/checkout@v4
with:
ref: dev
path: dev
# Copy the script so the base can bench with the same parameters
- name: Run GPU bench on base branch
run: |
# Copy justfile to dev, overwriting existing config with that of PR branch
cp ../benches/justfile .
# Run benchmark
just gpu-bench-ci supernova-ci
# Copy bench output to PR branch
cp supernova-ci-${{ env.BASE_COMMIT }}.json ..
working-directory: ${{ github.workspace }}/dev
- name: Run GPU bench on PR branch
run: |
just gpu-bench-ci supernova-ci
cp supernova-ci-${{ github.sha }}.json ..
working-directory: ${{ github.workspace }}/benches
- name: copy the benchmark template and prepare it with data
run: |
cp .github/tables.toml .
# Get CPU model
CPU_MODEL=$(grep '^model name' /proc/cpuinfo | head -1 | awk -F ': ' '{ print $2 }')
# Get vCPU count
NUM_VCPUS=$(nproc --all)
# Get total RAM in GB
TOTAL_RAM=$(grep MemTotal /proc/meminfo | awk '{$2=$2/(1024^2); print int($2), "GB RAM";}')
WORKFLOW_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
# Use conditionals to ensure that only non-empty variables are inserted
[[ ! -z "${{ env.GPU_NAME }}" ]] && sed -i "/^\"\"\"$/i ${{ env.GPU_NAME }}" tables.toml
[[ ! -z "$CPU_MODEL" ]] && sed -i "/^\"\"\"$/i $CPU_MODEL" tables.toml
[[ ! -z "$NUM_VCPUS" ]] && sed -i "/^\"\"\"$/i $NUM_VCPUS vCPUs" tables.toml
[[ ! -z "$TOTAL_RAM" ]] && sed -i "/^\"\"\"$/i $TOTAL_RAM" tables.toml
sed -i "/^\"\"\"$/i Workflow run: $WORKFLOW_URL" tables.toml
echo "WORKFLOW_URL=$WORKFLOW_URL" | tee -a $GITHUB_ENV
working-directory: ${{ github.workspace }}
# Create a `criterion-table` and write in commit comment
- name: Run `criterion-table`
run: |
cat supernova-ci-${{ env.BASE_COMMIT }}.json supernova-ci-${{ github.sha }}.json | criterion-table > BENCHMARKS.md
- name: Write bench on commit comment
uses: peter-evans/commit-comment@v3
with:
body-path: BENCHMARKS.md
# TODO: Set `$BENCH_NOISE_THRESHOLD` via `cardinalby/export-env-action` or hardcode to 1.3
# Check for a slowdown >= `$BENCH_NOISE_THRESHOLD` (fallback is 30%/1.3x). If so, open an issue but don't block merge
# Since we are parsing for slowdowns, we simply add 1 to the noise threshold decimal to get the regression factor
- name: Check for perf regression
id: regression-check
run: |
REGRESSIONS=$(grep -o '[0-9.]*x slower' BENCHMARKS.md | cut -d 'x' -f1)
echo $REGRESSIONS
if [ ! -z "${{ env.BENCH_NOISE_THRESHOLD}}" ]; then
REGRESSION_FACTOR=$(echo "${{ env.BENCH_NOISE_THRESHOLD }}+1" | bc)
else
REGRESSION_FACTOR=1.3
fi
echo "NOISE_THRESHOLD=$(echo "($REGRESSION_FACTOR-1)*100" | bc)" | tee -a $GITHUB_ENV
for r in $REGRESSIONS
do
if (( $(echo "$r >= $REGRESSION_FACTOR" | bc -l) ))
then
echo "regression=true" | tee -a $GITHUB_OUTPUT
fi
done
# Not possible to use ${{ github.event.number }} with the `merge_group` trigger
- name: Get PR number from merge branch
if: steps.regression-check.outputs.regression == 'true'
run: |
echo "PR_NUMBER=$(echo ${{ github.event.merge_group.head_ref }} | sed -e 's/.*pr-\(.*\)-.*/\1/')" | tee -a $GITHUB_ENV
- uses: JasonEtco/create-an-issue@v2
if: steps.regression-check.outputs.regression == 'true'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ env.PR_NUMBER }}
GIT_SHA: ${{ github.sha }}
WORKFLOW_URL: ${{ env.WORKFLOW_URL }}
NOISE_THRESHOLD: ${{ env.NOISE_THRESHOLD }}
with:
update_existing: true
filename: .github/PERF_REGRESSION.md