Connectum-Framework · intech · Apr 19, 2026 · Apr 19, 2026
diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml
@@ -0,0 +1,166 @@
+name: benchmark
+
+# Runs the benchmark matrix on every PR targeting main, on pushes to main
+# (baseline refresh), and on manual dispatch. The job compares PR ops/s
+# against the latest main baseline and flags >5% throughput regressions
+# and >10% memory regressions.
+#
+# Storage model (see benchmarks/baselines/README.md):
+#   - Artifacts hold the authoritative JSON for trend history (90-day
+#     retention for PRs, 365 days for the main baseline).
+#   - `benchmarks/baselines/main.json` is an in-repo quick-reference copy
+#     that gets refreshed by a follow-up chore PR after a merge to main.
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - "packages/protobuf/**"
+      - "benchmarks/**"
+      - ".github/workflows/benchmark.yaml"
+  push:
+    branches: [main]
+    paths:
+      - "packages/protobuf/**"
+      - "benchmarks/**"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+env:
+  DO_NOT_TRACK: 1
+  NODE_VERSION: "22"
+
+concurrency:
+  group: benchmark-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  bench-matrix:
+    name: bench-matrix (${{ github.event_name }})
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+        with:
+          # Enough history that we can also check out the base branch for
+          # the baseline comparison pass if the artifact download fails.
+          fetch-depth: 0
+
+      - name: Setup Node ${{ env.NODE_VERSION }}
+        uses: actions/setup-node@v6
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: "npm"
+
+      - name: Install
+        run: npm ci --ignore-scripts
+        env:
+          HUSKY: 0
+
+      - name: Build @bufbuild/protobuf
+        run: npx turbo run build --filter=@bufbuild/protobuf
+
+      - name: Generate benchmark code (proto + pbjs)
+        run: npx turbo run generate --filter=@bufbuild/protobuf-benchmarks
+
+      - name: Run benchmark matrix
+        working-directory: benchmarks
+        run: bash scripts/run-matrix-ci.sh bench-results.json
+
+      - name: Upload run artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: bench-results-${{ github.event.pull_request.number || github.sha }}
+          path: benchmarks/bench-results.json
+          retention-days: 90
+
+      # ------------------------------------------------------------------
+      # Baseline acquisition (PR only). For `push` to main, the PR run
+      # becomes the new baseline — the artifact upload below is sufficient.
+      # ------------------------------------------------------------------
+
+      - name: Download latest main baseline artifact
+        if: github.event_name == 'pull_request'
+        id: dl-baseline
+        uses: dawidd6/action-download-artifact@v6
+        continue-on-error: true
+        with:
+          workflow: benchmark.yaml
+          branch: main
+          name: bench-baseline-main
+          path: benchmarks/baseline-download
+          search_artifacts: true
+          if_no_artifact_found: warn
+
+      - name: Resolve baseline source
+        if: github.event_name == 'pull_request'
+        run: |
+          set -euo pipefail
+          if [[ -f benchmarks/baseline-download/bench-results.json ]]; then
+            cp benchmarks/baseline-download/bench-results.json benchmarks/baseline-results.json
+            echo "Using downloaded main artifact as baseline."
+          elif [[ -f benchmarks/baselines/main.json ]]; then
+            cp benchmarks/baselines/main.json benchmarks/baseline-results.json
+            echo "Using in-repo benchmarks/baselines/main.json as baseline."
+          else
+            echo "No baseline available — compare step will emit an informational report."
+          fi
+
+      - name: Compare PR against baseline
+        if: github.event_name == 'pull_request'
+        id: compare
+        working-directory: benchmarks
+        run: |
+          set -euo pipefail
+          if [[ -f baseline-results.json ]]; then
+            npx tsx scripts/compare-results.ts \
+              --baseline=baseline-results.json \
+              --current=bench-results.json \
+              --output=bench-report.md \
+              --threshold-ops=5 \
+              --threshold-mem=10
+          else
+            npx tsx scripts/compare-results.ts \
+              --current=bench-results.json \
+              --output=bench-report.md \
+              --threshold-ops=5 \
+              --threshold-mem=10 \
+              --no-baseline
+          fi
+          if grep -q "REGRESSION" bench-report.md 2>/dev/null; then
+            echo "status=regression" >> "$GITHUB_OUTPUT"
+          else
+            echo "status=ok" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Comment report on PR
+        if: github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          header: benchmark-matrix
+          path: benchmarks/bench-report.md
+
+      - name: Flag regression annotation
+        if: github.event_name == 'pull_request' && steps.compare.outputs.status == 'regression'
+        run: |
+          echo "::warning::Benchmark matrix flagged a regression. See the PR comment for the full table."
+
+      # ------------------------------------------------------------------
+      # Baseline refresh (push-to-main only). The PR run becomes the new
+      # authoritative baseline and gets uploaded as a stable-named artifact
+      # so subsequent PR jobs can pull it.
+      # ------------------------------------------------------------------
+
+      - name: Upload baseline artifact (main only)
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+        uses: actions/upload-artifact@v4
+        with:
+          name: bench-baseline-main
+          path: benchmarks/bench-results.json
+          retention-days: 365
+          overwrite: true
diff --git a/benchmarks/baselines/README.md b/benchmarks/baselines/README.md
@@ -0,0 +1,59 @@
+# Benchmark Baselines
+
+This directory holds **quick-reference baselines** — last-known-good throughput numbers for the matrix, committed alongside code so a local developer can run `scripts/compare-results.ts` without having to hit GitHub.
+
+## Storage model
+
+The authoritative baseline is an **Actions artifact**, not a file in the repo:
+
+| Where                                        | Role                                  | Retention        |
+|----------------------------------------------|---------------------------------------|------------------|
+| Artifact `bench-baseline-main`               | Source of truth for CI diffs          | 365 days         |
+| `benchmarks/baselines/main.json`             | Quick-reference for local dev + fallback when the artifact API is unreachable | tracked in git |
+| Artifact `bench-results-<pr-number>`         | Historical trend per PR               | 90 days          |
+
+### Why two stores
+
+1. **Artifacts give trend history for free.** Downloading `bench-baseline-main@<run-id>` at any point in the past reconstructs the baseline of that day. No file churn in git.
+2. **A committed fallback de-risks the artifact dependency.** If GitHub artifact downloads rate-limit or the action times out, CI falls back to `main.json` so PRs are never blocked on infrastructure. The file does not have to be fresh to be useful — being roughly right on order of magnitude is enough to flag a regression that a human can investigate.
+3. **Local dev needs a zero-network path.** `npm run bench:matrix:ci` followed by `npm run bench:matrix:compare -- --baseline=baselines/main.json --current=bench-results.json` works entirely offline.
+
+## Update procedure
+
+`main.json` is refreshed **by hand via a one-line PR** after every merge to `main` whose benchmark numbers moved materially (>5% on any row). A follow-up iteration will automate this via a `benchmark-baseline-refresh` workflow that opens the PR from the push-to-main run, but until that lands, manual refresh is the policy.
+
+```bash
+# After a merge to main, pull the latest artifact:
+gh run download --name bench-baseline-main --dir /tmp/baseline
+cp /tmp/baseline/bench-results.json benchmarks/baselines/main.json
+
+# Commit on a chore/ branch and open a PR:
+git checkout -b chore/refresh-benchmark-baseline
+git add benchmarks/baselines/main.json
+git commit -m "chore(benchmarks): refresh main baseline"
+git push -u origin HEAD
+gh pr create --title "chore(benchmarks): refresh main baseline" \
+  --body "Auto-refresh from the bench-baseline-main CI artifact."
+```
+
+## Format
+
+Every `*.json` in this directory is the structured payload written by `bench-matrix.ts` (last line of its stdout when run standalone, or the full file when run via `scripts/run-matrix-ci.sh`):
+
+```json
+{
+  "node": "v22.11.0",
+  "platform": "linux/x64",
+  "timestamp": "2026-04-19T18:00:00.000Z",
+  "results": [
+    {
+      "name": "SimpleMessage :: toBinary (pre-built, 19 B)",
+      "opsPerSec": 1065000,
+      "rme": 1.3,
+      "samples": 512
+    }
+  ]
+}
+```
+
+Field names are stable; additional fields are additive (e.g. a future `bytesPerOp` for memory tracking will not break existing consumers).
diff --git a/benchmarks/package.json b/benchmarks/package.json
@@ -11,6 +11,8 @@
     "bench:fromBinary": "tsx src/bench-fromBinary.ts",
     "bench:comparison": "tsx src/bench-comparison-protobufjs.ts",
     "bench:matrix": "tsx src/bench-matrix.ts",
+    "bench:matrix:ci": "bash scripts/run-matrix-ci.sh bench-results.json",
+    "bench:matrix:compare": "tsx scripts/compare-results.ts",
     "bench:memory": "node --expose-gc --import tsx src/bench-memory.ts",
     "bench:report": "tsx src/report.ts",
     "build": "../node_modules/typescript/bin/tsc --noEmit",