Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
name: Benchmark Performance (Whispercpp)

on:
workflow_dispatch:
inputs:
repository:
description: "Repository to benchmark"
required: false
type: string
ref:
description: "Git ref (branch/tag/SHA) to benchmark"
required: false
type: string
include_desktop:
description: "Run desktop benchmark matrix"
required: false
type: boolean
default: true

permissions:
contents: read
packages: read
id-token: write

jobs:
context:
runs-on: ubuntu-latest
outputs:
repository: ${{ steps.ctx.outputs.repository }}
ref: ${{ steps.ctx.outputs.ref }}
include_desktop: ${{ steps.ctx.outputs.include_desktop }}
steps:
- id: ctx
shell: bash
env:
INPUT_REPO: ${{ inputs.repository }}
INPUT_REF: ${{ inputs.ref }}
INPUT_INCLUDE_DESKTOP: ${{ inputs.include_desktop }}
REPO: ${{ github.repository }}
REF_NAME: ${{ github.ref_name }}
run: |
repo="${INPUT_REPO:-$REPO}"
ref="${INPUT_REF:-$REF_NAME}"
echo "repository=$repo" >> "$GITHUB_OUTPUT"
echo "ref=$ref" >> "$GITHUB_OUTPUT"
echo "include_desktop=${INPUT_INCLUDE_DESKTOP}" >> "$GITHUB_OUTPUT"

prebuild:
needs: context
permissions:
contents: write
packages: write
pull-requests: write
id-token: write
uses: ./.github/workflows/prebuilds-qvac-lib-infer-whispercpp.yml
secrets: inherit
with:
repository: ${{ needs.context.outputs.repository }}
ref: ${{ needs.context.outputs.ref }}

desktop-benchmarks:
needs: [context, prebuild]
if: needs.context.outputs.include_desktop != 'false'
permissions:
contents: read
packages: read
id-token: write
uses: ./.github/workflows/integration-test-qvac-lib-infer-whispercpp.yml
secrets: inherit
with:
repository: ${{ needs.context.outputs.repository }}
ref: ${{ needs.context.outputs.ref }}
run_integration_tests: false
run_rtf_benchmarks: true


summarize:
needs: [context, desktop-benchmarks]
if: always()
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
with:
repository: ${{ needs.context.outputs.repository }}
ref: ${{ needs.context.outputs.ref }}
token: ${{ secrets.PAT_TOKEN }}

- name: Download desktop benchmark artifacts
if: needs.context.outputs.include_desktop != 'false'
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # 8.0.1
with:
pattern: rtf-results-*
path: benchmark-artifacts/desktop
merge-multiple: true


- name: Generate consolidated benchmark report
run: |
node scripts/perf-report/aggregate-whisper-rtf.js \
--dir benchmark-artifacts/desktop \
--manual-dir packages/qvac-lib-infer-whispercpp/benchmarks/manual-results \
--output benchmark-artifacts/whisper-performance-findings.md \
--output-json benchmark-artifacts/whisper-performance-findings.json

- name: Add summary
run: cat benchmark-artifacts/whisper-performance-findings.md >> "$GITHUB_STEP_SUMMARY"

- name: Upload consolidated benchmark report
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # 7.0.0
with:
name: whisper-performance-findings
path: |
benchmark-artifacts/whisper-performance-findings.md
benchmark-artifacts/whisper-performance-findings.json
retention-days: 30
49 changes: 44 additions & 5 deletions .github/workflows/benchmark-qvac-lib-infer-whispercpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ name: Benchmark (Whispercpp)
on:
workflow_dispatch:
inputs:
repository:
description: 'Repository to benchmark'
required: false
type: string
ref:
description: 'Git ref (branch/tag/SHA) to benchmark'
required: false
type: string
dataset_type:
description: 'Dataset type'
required: true
Expand Down Expand Up @@ -292,6 +300,9 @@ jobs:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
with:
repository: ${{ github.event.inputs.repository || github.repository }}
ref: ${{ github.event.inputs.ref || github.ref }}
token: ${{ secrets.PAT_TOKEN }}
submodules: recursive

- name: Configure scoped registry for @tetherto and @qvac packages
Expand Down Expand Up @@ -344,6 +355,25 @@ jobs:
npm install -g bare-dev
npm install -g bare bare-make

- name: Download prebuilds
id: prebuilds
continue-on-error: true
env:
GH_TOKEN: ${{ secrets.PAT_TOKEN }}
BENCHMARK_REPOSITORY: ${{ github.event.inputs.repository || github.repository }}
BENCHMARK_REF: ${{ github.event.inputs.ref || github.ref_name }}
run: |
echo "Fetching prebuilds artifact from latest successful prebuilds run..."
RUN_ID=$(gh run list --repo "$BENCHMARK_REPOSITORY" --workflow=prebuilds-qvac-lib-infer-whispercpp.yml --branch "$BENCHMARK_REF" --status=success --limit 1 --json databaseId --jq '.[0].databaseId')
if [ -z "$RUN_ID" ]; then
echo "::warning::No successful prebuilds run found - will fall back to published npm package"
exit 1
fi

echo "Using prebuilds from run ID: $RUN_ID"
gh run download "$RUN_ID" --repo "$BENCHMARK_REPOSITORY" -n prebuilds -D "${{ env.WORKDIR }}/prebuilds"
ls -la "${{ env.WORKDIR }}/prebuilds/" || true

- name: Clone custom model repository
if: github.event.inputs.custom_model_repo != ''
working-directory: ${{ env.WORKDIR }}
Expand Down Expand Up @@ -536,6 +566,7 @@ jobs:
esac
echo "Using default max_samples for $MODEL_SIZE: $MAX_SAMPLES"
fi
echo "MAX_SAMPLES=$MAX_SAMPLES" >> $GITHUB_ENV

# Set timeout based on model size
case "$MODEL_SIZE" in
Expand Down Expand Up @@ -604,9 +635,21 @@ jobs:
echo "=== Updated ${CONFIG_FILE} ==="
cat "$CONFIG_PATH"

- name: Install main package dependencies
if: steps.prebuilds.outcome == 'success'
working-directory: ${{ env.WORKDIR }}
run: npm install

- name: Install benchmark server dependencies
working-directory: ${{ env.WORKDIR }}/benchmarks/server
run: npm install
run: |
npm install
if [ "${{ steps.prebuilds.outcome }}" = "success" ]; then
echo "Installing addon from local source (prebuilds available)..."
npm install ../../
else
echo "Using published addon dependency from npm..."
fi

- name: Install benchmark client dependencies
working-directory: ${{ env.WORKDIR }}/benchmarks/client
Expand Down Expand Up @@ -675,10 +718,6 @@ jobs:
VAD_SUFFIX="no_vad"
STREAMING_SUFFIX="batch"

if [ "${{ github.event.inputs.vad_enabled }}" = "true" ]; then
VAD_SUFFIX="vad"
fi

if [ "${{ github.event.inputs.streaming_mode }}" = "true" ]; then
STREAMING_SUFFIX="streaming"
fi
Expand Down
117 changes: 115 additions & 2 deletions .github/workflows/integration-test-qvac-lib-infer-whispercpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ on:
description: "NPM package containing prebuilds (e.g. @qvac/transcription-whispercpp@1.0.0)"
required: true
type: string
run_integration_tests:
description: "Run the regular integration suite"
required: false
type: boolean
default: true
run_rtf_benchmarks:
description: "Run RTF performance benchmarks"
required: false
type: boolean
default: true
benchmark_matrix_json:
description: "Optional JSON array overriding the per-runner benchmark matrix"
required: false
type: string
workdir:
description: "Relative path to package directory in monorepo"
type: string
Expand All @@ -19,6 +33,20 @@ on:
type: string
repository:
type: string
run_integration_tests:
description: "Run the regular integration suite"
type: boolean
required: false
default: true
run_rtf_benchmarks:
description: "Run RTF performance benchmarks"
type: boolean
required: false
default: true
benchmark_matrix_json:
description: "Optional JSON array overriding the per-runner benchmark matrix"
type: string
required: false
workdir:
description: "Relative path to package directory in monorepo"
required: false
Expand Down Expand Up @@ -47,21 +75,33 @@ jobs:
- os: ubuntu-22.04
platform: linux
arch: x64
benchmark_matrix_json: >-
[{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"}]
- os: ai-run-linux-gpu
platform: linux
arch: x64
benchmark_matrix_json: >-
[{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"},{"modelFile":"ggml-tiny.bin","useGPU":true,"backendHint":"cuda"}]
- os: ubuntu-24.04-arm
platform: linux
arch: arm64
benchmark_matrix_json: >-
[{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"}]
- os: macos-14-xlarge
platform: darwin
arch: arm64
benchmark_matrix_json: >-
[{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"},{"modelFile":"ggml-tiny.bin","useGPU":true,"backendHint":"coreml"}]
- os: macos-15-large
platform: darwin
arch: x64
benchmark_matrix_json: >-
[{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"},{"modelFile":"ggml-tiny.bin","useGPU":true,"backendHint":"coreml"}]
- os: windows-2022
platform: win32
arch: x64
benchmark_matrix_json: >-
[{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"},{"modelFile":"ggml-tiny.bin","useGPU":true,"backendHint":"directml"}]

steps:
- name: Setup Node.js
Expand Down Expand Up @@ -206,21 +246,94 @@ jobs:
brew install --quiet openblas lapack fftw

- name: Run integration test (Unix)
if: ${{ matrix.platform != 'win32' }}
if: ${{ matrix.platform != 'win32' && inputs.run_integration_tests != false }}
working-directory: ${{ inputs.workdir }}
shell: bash
run: npm run test:integration
env:
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}

- name: Run integration test (Windows)
if: ${{ matrix.platform == 'win32' }}
if: ${{ matrix.platform == 'win32' && inputs.run_integration_tests != false }}
working-directory: ${{ inputs.workdir }}
shell: powershell
run: npm run test:integration
env:
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}

- name: Run RTF benchmark (Unix)
if: ${{ always() && matrix.platform != 'win32' && inputs.run_rtf_benchmarks != false }}
continue-on-error: true
working-directory: ${{ inputs.workdir }}
shell: bash
run: node scripts/run-rtf-benchmark-matrix.js
env:
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
QVAC_WHISPER_BENCHMARK_DEVICE: ${{ matrix.os }}
QVAC_WHISPER_BENCHMARK_RUNNER: ${{ matrix.os }}
QVAC_WHISPER_BENCHMARK_MATRIX_JSON: ${{ inputs.benchmark_matrix_json || matrix.benchmark_matrix_json }}

- name: Run RTF benchmark (Windows)
if: ${{ always() && matrix.platform == 'win32' && inputs.run_rtf_benchmarks != false }}
continue-on-error: true
working-directory: ${{ inputs.workdir }}
shell: powershell
run: node scripts/run-rtf-benchmark-matrix.js
env:
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
QVAC_WHISPER_BENCHMARK_DEVICE: ${{ matrix.os }}
QVAC_WHISPER_BENCHMARK_RUNNER: ${{ matrix.os }}
QVAC_WHISPER_BENCHMARK_MATRIX_JSON: ${{ inputs.benchmark_matrix_json || matrix.benchmark_matrix_json }}

- name: Upload RTF results
if: ${{ always() && inputs.run_rtf_benchmarks != false }}
continue-on-error: true
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # 7.0.0
with:
name: rtf-results-${{ matrix.os }}-${{ matrix.platform }}-${{ matrix.arch }}
path: ${{ inputs.workdir }}/benchmarks/results/rtf-benchmark-*.json
retention-days: 30
if-no-files-found: ignore

- name: Add RTF summary
if: ${{ always() && inputs.run_rtf_benchmarks != false }}
continue-on-error: true
working-directory: ${{ inputs.workdir }}
shell: bash
run: |
echo "### RTF - ${{ matrix.platform }}-${{ matrix.arch }}" >> "$GITHUB_STEP_SUMMARY"
node -e "
const fs = require('fs')
const path = require('path')
const resultsDir = path.resolve('benchmarks/results')
if (!fs.existsSync(resultsDir)) process.exit(0)
const files = fs.readdirSync(resultsDir)
.filter((file) => file.startsWith('rtf-benchmark-') && file.endsWith('.json'))
.sort()
if (files.length === 0) process.exit(0)
const lines = [
'| Model | GPU | Backend | Device | Mean RTF | P50 | P95 |',
'|-------|-----|---------|--------|----------|-----|-----|'
]
for (const file of files) {
const report = JSON.parse(fs.readFileSync(path.join(resultsDir, file), 'utf8'))
const model = report.model && report.model.name ? report.model.name.replace(/\.bin$/, '') : 'unknown'
const summary = report.summary || {}
const rtf = summary.rtf || {}
lines.push(
'| ' + model +
' | ' + ((report.requested && report.requested.useGPU) ? 'yes' : 'no') +
' | ' + ((report.labels && report.labels.backend) || (report.requested && report.requested.backendHint) || 'n/a') +
' | ' + ((report.labels && (report.labels.device || report.labels.runner)) || 'n/a') +
' | ' + (rtf.mean !== undefined ? Number(rtf.mean).toFixed(4) : 'n/a') +
' | ' + (rtf.p50 !== undefined ? Number(rtf.p50).toFixed(4) : 'n/a') +
' | ' + (rtf.p95 !== undefined ? Number(rtf.p95).toFixed(4) : 'n/a') +
' |'
)
}
console.log(lines.join('\n'))
" >> "$GITHUB_STEP_SUMMARY"

- name: Print run state (Unix)
if: ${{ matrix.platform != 'win32' }}
working-directory: ${{ inputs.workdir }}
Expand Down
Loading
Loading