tetherto · GustavoA1604 · Apr 22, 2026 · Apr 21, 2026 · Apr 22, 2026
@@ -0,0 +1,118 @@
+name: Benchmark Performance (Whispercpp)
+
+on:
+  workflow_dispatch:
+    inputs:
+      repository:
+        description: "Repository to benchmark"
+        required: false
+        type: string
+      ref:
+        description: "Git ref (branch/tag/SHA) to benchmark"
+        required: false
+        type: string
+      include_desktop:
+        description: "Run desktop benchmark matrix"
+        required: false
+        type: boolean
+        default: true
+
+permissions:
+  contents: read
+  packages: read
+  id-token: write
+
+jobs:
+  context:
+    runs-on: ubuntu-latest
+    outputs:
+      repository: ${{ steps.ctx.outputs.repository }}
+      ref: ${{ steps.ctx.outputs.ref }}
+      include_desktop: ${{ steps.ctx.outputs.include_desktop }}
+    steps:
+      - id: ctx
+        shell: bash
+        env:
+          INPUT_REPO: ${{ inputs.repository }}
+          INPUT_REF: ${{ inputs.ref }}
+          INPUT_INCLUDE_DESKTOP: ${{ inputs.include_desktop }}
+          REPO: ${{ github.repository }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          repo="${INPUT_REPO:-$REPO}"
+          ref="${INPUT_REF:-$REF_NAME}"
+          echo "repository=$repo" >> "$GITHUB_OUTPUT"
+          echo "ref=$ref" >> "$GITHUB_OUTPUT"
+          echo "include_desktop=${INPUT_INCLUDE_DESKTOP}" >> "$GITHUB_OUTPUT"
+
+  prebuild:
+    needs: context
+    permissions:
+      contents: write
+      packages: write
+      pull-requests: write
+      id-token: write
+    uses: ./.github/workflows/prebuilds-qvac-lib-infer-whispercpp.yml
+    secrets: inherit
+    with:
+      repository: ${{ needs.context.outputs.repository }}
+      ref: ${{ needs.context.outputs.ref }}
+
+  desktop-benchmarks:
+    needs: [context, prebuild]
+    if: needs.context.outputs.include_desktop != 'false'
+    permissions:
+      contents: read
+      packages: read
+      id-token: write
+    uses: ./.github/workflows/integration-test-qvac-lib-infer-whispercpp.yml
+    secrets: inherit
+    with:
+      repository: ${{ needs.context.outputs.repository }}
+      ref: ${{ needs.context.outputs.ref }}
+      run_integration_tests: false
+      run_rtf_benchmarks: true
+
+
+  summarize:
+    needs: [context, desktop-benchmarks]
+    if: always()
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+        with:
+          repository: ${{ needs.context.outputs.repository }}
+          ref: ${{ needs.context.outputs.ref }}
+          token: ${{ secrets.PAT_TOKEN }}
+
+      - name: Download desktop benchmark artifacts
+        if: needs.context.outputs.include_desktop != 'false'
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # 8.0.1
+        with:
+          pattern: rtf-results-*
+          path: benchmark-artifacts/desktop
+          merge-multiple: true
+
+
+      - name: Generate consolidated benchmark report
+        run: |
+          node scripts/perf-report/aggregate-whisper-rtf.js \
+            --dir benchmark-artifacts/desktop \
+            --manual-dir packages/qvac-lib-infer-whispercpp/benchmarks/manual-results \
+            --output benchmark-artifacts/whisper-performance-findings.md \
+            --output-json benchmark-artifacts/whisper-performance-findings.json
+
+      - name: Add summary
+        run: cat benchmark-artifacts/whisper-performance-findings.md >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload consolidated benchmark report
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # 7.0.0
+        with:
+          name: whisper-performance-findings
+          path: |
+            benchmark-artifacts/whisper-performance-findings.md
+            benchmark-artifacts/whisper-performance-findings.json
+          retention-days: 30
@@ -3,6 +3,14 @@ name: Benchmark (Whispercpp)
 on:
   workflow_dispatch:
     inputs:
+      repository:
+        description: 'Repository to benchmark'
+        required: false
+        type: string
+      ref:
+        description: 'Git ref (branch/tag/SHA) to benchmark'
+        required: false
+        type: string
       dataset_type:
         description: 'Dataset type'
         required: true
@@ -292,6 +300,9 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
         with:
+          repository: ${{ github.event.inputs.repository || github.repository }}
+          ref: ${{ github.event.inputs.ref || github.ref }}
+          token: ${{ secrets.PAT_TOKEN }}
           submodules: recursive
 
       - name: Configure scoped registry for @tetherto and @qvac packages
@@ -344,6 +355,25 @@ jobs:
           npm install -g bare-dev
           npm install -g bare bare-make
 
+      - name: Download prebuilds
+        id: prebuilds
+        continue-on-error: true
+        env:
+          GH_TOKEN: ${{ secrets.PAT_TOKEN }}
+          BENCHMARK_REPOSITORY: ${{ github.event.inputs.repository || github.repository }}
+          BENCHMARK_REF: ${{ github.event.inputs.ref || github.ref_name }}
+        run: |
+          echo "Fetching prebuilds artifact from latest successful prebuilds run..."
+          RUN_ID=$(gh run list --repo "$BENCHMARK_REPOSITORY" --workflow=prebuilds-qvac-lib-infer-whispercpp.yml --branch "$BENCHMARK_REF" --status=success --limit 1 --json databaseId --jq '.[0].databaseId')
+          if [ -z "$RUN_ID" ]; then
+            echo "::warning::No successful prebuilds run found - will fall back to published npm package"
+            exit 1
+          fi
+
+          echo "Using prebuilds from run ID: $RUN_ID"
+          gh run download "$RUN_ID" --repo "$BENCHMARK_REPOSITORY" -n prebuilds -D "${{ env.WORKDIR }}/prebuilds"
+          ls -la "${{ env.WORKDIR }}/prebuilds/" || true
+
       - name: Clone custom model repository
         if: github.event.inputs.custom_model_repo != ''
         working-directory: ${{ env.WORKDIR }}
@@ -536,6 +566,7 @@ jobs:
             esac
             echo "Using default max_samples for $MODEL_SIZE: $MAX_SAMPLES"
           fi
+          echo "MAX_SAMPLES=$MAX_SAMPLES" >> $GITHUB_ENV
 
           # Set timeout based on model size
           case "$MODEL_SIZE" in
@@ -604,9 +635,21 @@ jobs:
           echo "=== Updated ${CONFIG_FILE} ==="
           cat "$CONFIG_PATH"
 
+      - name: Install main package dependencies
+        if: steps.prebuilds.outcome == 'success'
+        working-directory: ${{ env.WORKDIR }}
+        run: npm install
+
       - name: Install benchmark server dependencies
         working-directory: ${{ env.WORKDIR }}/benchmarks/server
-        run: npm install
+        run: |
+          npm install
+          if [ "${{ steps.prebuilds.outcome }}" = "success" ]; then
+            echo "Installing addon from local source (prebuilds available)..."
+            npm install ../../
+          else
+            echo "Using published addon dependency from npm..."
+          fi
 
       - name: Install benchmark client dependencies
         working-directory: ${{ env.WORKDIR }}/benchmarks/client
@@ -675,10 +718,6 @@ jobs:
           VAD_SUFFIX="no_vad"
           STREAMING_SUFFIX="batch"
 
-          if [ "${{ github.event.inputs.vad_enabled }}" = "true" ]; then
-            VAD_SUFFIX="vad"
-          fi
-
           if [ "${{ github.event.inputs.streaming_mode }}" = "true" ]; then
             STREAMING_SUFFIX="streaming"
           fi

@@ -7,6 +7,20 @@ on:
         description: "NPM package containing prebuilds (e.g. @qvac/transcription-whispercpp@1.0.0)"
         required: true
         type: string
+      run_integration_tests:
+        description: "Run the regular integration suite"
+        required: false
+        type: boolean
+        default: true
+      run_rtf_benchmarks:
+        description: "Run RTF performance benchmarks"
+        required: false
+        type: boolean
+        default: true
+      benchmark_matrix_json:
+        description: "Optional JSON array overriding the per-runner benchmark matrix"
+        required: false
+        type: string
       workdir:
         description: "Relative path to package directory in monorepo"
         type: string
@@ -19,6 +33,20 @@ on:
         type: string
       repository:
         type: string
+      run_integration_tests:
+        description: "Run the regular integration suite"
+        type: boolean
+        required: false
+        default: true
+      run_rtf_benchmarks:
+        description: "Run RTF performance benchmarks"
+        type: boolean
+        required: false
+        default: true
+      benchmark_matrix_json:
+        description: "Optional JSON array overriding the per-runner benchmark matrix"
+        type: string
+        required: false
       workdir:
         description: "Relative path to package directory in monorepo"
         required: false
@@ -47,21 +75,33 @@ jobs:
           - os: ubuntu-22.04
             platform: linux
             arch: x64
+            benchmark_matrix_json: >-
+              [{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"}]
           - os: ai-run-linux-gpu
             platform: linux
             arch: x64
+            benchmark_matrix_json: >-
+              [{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"},{"modelFile":"ggml-tiny.bin","useGPU":true,"backendHint":"cuda"}]
           - os: ubuntu-24.04-arm
             platform: linux
             arch: arm64
+            benchmark_matrix_json: >-
+              [{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"}]
           - os: macos-14-xlarge
             platform: darwin
             arch: arm64
+            benchmark_matrix_json: >-
+              [{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"},{"modelFile":"ggml-tiny.bin","useGPU":true,"backendHint":"coreml"}]
           - os: macos-15-large
             platform: darwin
             arch: x64
+            benchmark_matrix_json: >-
+              [{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"},{"modelFile":"ggml-tiny.bin","useGPU":true,"backendHint":"coreml"}]
           - os: windows-2022
             platform: win32
             arch: x64
+            benchmark_matrix_json: >-
+              [{"modelFile":"ggml-tiny.bin","useGPU":false,"backendHint":"cpu"},{"modelFile":"ggml-tiny.bin","useGPU":true,"backendHint":"directml"}]
 
     steps:
       - name: Setup Node.js
@@ -206,21 +246,94 @@ jobs:
           brew install --quiet openblas lapack fftw
 
       - name: Run integration test (Unix)
-        if: ${{ matrix.platform != 'win32' }}
+        if: ${{ matrix.platform != 'win32' && inputs.run_integration_tests != false }}
         working-directory: ${{ inputs.workdir }}
         shell: bash
         run: npm run test:integration
         env:
           GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
 
       - name: Run integration test (Windows)
-        if: ${{ matrix.platform == 'win32' }}
+        if: ${{ matrix.platform == 'win32' && inputs.run_integration_tests != false }}
         working-directory: ${{ inputs.workdir }}
         shell: powershell
         run: npm run test:integration
         env:
           GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
 
+      - name: Run RTF benchmark (Unix)
+        if: ${{ always() && matrix.platform != 'win32' && inputs.run_rtf_benchmarks != false }}
+        continue-on-error: true
+        working-directory: ${{ inputs.workdir }}
+        shell: bash
+        run: node scripts/run-rtf-benchmark-matrix.js
+        env:
+          GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
+          QVAC_WHISPER_BENCHMARK_DEVICE: ${{ matrix.os }}
+          QVAC_WHISPER_BENCHMARK_RUNNER: ${{ matrix.os }}
+          QVAC_WHISPER_BENCHMARK_MATRIX_JSON: ${{ inputs.benchmark_matrix_json || matrix.benchmark_matrix_json }}
+
+      - name: Run RTF benchmark (Windows)
+        if: ${{ always() && matrix.platform == 'win32' && inputs.run_rtf_benchmarks != false }}
+        continue-on-error: true
+        working-directory: ${{ inputs.workdir }}
+        shell: powershell
+        run: node scripts/run-rtf-benchmark-matrix.js
+        env:
+          GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
+          QVAC_WHISPER_BENCHMARK_DEVICE: ${{ matrix.os }}
+          QVAC_WHISPER_BENCHMARK_RUNNER: ${{ matrix.os }}
+          QVAC_WHISPER_BENCHMARK_MATRIX_JSON: ${{ inputs.benchmark_matrix_json || matrix.benchmark_matrix_json }}
+
+      - name: Upload RTF results
+        if: ${{ always() && inputs.run_rtf_benchmarks != false }}
+        continue-on-error: true
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # 7.0.0
+        with:
+          name: rtf-results-${{ matrix.os }}-${{ matrix.platform }}-${{ matrix.arch }}
+          path: ${{ inputs.workdir }}/benchmarks/results/rtf-benchmark-*.json
+          retention-days: 30
+          if-no-files-found: ignore
+
+      - name: Add RTF summary
+        if: ${{ always() && inputs.run_rtf_benchmarks != false }}
+        continue-on-error: true
+        working-directory: ${{ inputs.workdir }}
+        shell: bash
+        run: |
+          echo "### RTF - ${{ matrix.platform }}-${{ matrix.arch }}" >> "$GITHUB_STEP_SUMMARY"
+          node -e "
+            const fs = require('fs')
+            const path = require('path')
+            const resultsDir = path.resolve('benchmarks/results')
+            if (!fs.existsSync(resultsDir)) process.exit(0)
+            const files = fs.readdirSync(resultsDir)
+              .filter((file) => file.startsWith('rtf-benchmark-') && file.endsWith('.json'))
+              .sort()
+            if (files.length === 0) process.exit(0)
+            const lines = [
+              '| Model | GPU | Backend | Device | Mean RTF | P50 | P95 |',
+              '|-------|-----|---------|--------|----------|-----|-----|'
+            ]
+            for (const file of files) {
+              const report = JSON.parse(fs.readFileSync(path.join(resultsDir, file), 'utf8'))
+              const model = report.model && report.model.name ? report.model.name.replace(/\.bin$/, '') : 'unknown'
+              const summary = report.summary || {}
+              const rtf = summary.rtf || {}
+              lines.push(
+                '| ' + model +
+                ' | ' + ((report.requested && report.requested.useGPU) ? 'yes' : 'no') +
+                ' | ' + ((report.labels && report.labels.backend) || (report.requested && report.requested.backendHint) || 'n/a') +
+                ' | ' + ((report.labels && (report.labels.device || report.labels.runner)) || 'n/a') +
+                ' | ' + (rtf.mean !== undefined ? Number(rtf.mean).toFixed(4) : 'n/a') +
+                ' | ' + (rtf.p50 !== undefined ? Number(rtf.p50).toFixed(4) : 'n/a') +
+                ' | ' + (rtf.p95 !== undefined ? Number(rtf.p95).toFixed(4) : 'n/a') +
+                ' |'
+              )
+            }
+            console.log(lines.join('\n'))
+          " >> "$GITHUB_STEP_SUMMARY"
+
       - name: Print run state (Unix)
         if: ${{ matrix.platform != 'win32' }}
         working-directory: ${{ inputs.workdir }}