NixOS · winterqt · May 16, 2025 · May 13, 2025 · May 13, 2025 · May 14, 2025
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
@@ -21,6 +21,8 @@ permissions: {}
 jobs:
   get-merge-commit:
     uses: ./.github/workflows/get-merge-commit.yml
+    with:
+      leader: true
 
   outpaths:
     name: Outpaths

diff --git a/.github/workflows/get-merge-commit.yml b/.github/workflows/get-merge-commit.yml
@@ -5,6 +5,14 @@ on:
     paths:
       - .github/workflows/get-merge-commit.yml
   workflow_call:
+    inputs:
+      leader:
+        description: "Whether this job should get the merge commit, or wait for another job to do so"
+        type: boolean
+      leaderName:
+        description: "The name of the leader workflow"
+        type: string
+        default: "eval.yml"
     outputs:
       mergedSha:
         description: "The merge commit SHA"
@@ -22,8 +30,8 @@ jobs:
   resolve-merge-commit:
     runs-on: ubuntu-24.04-arm
     outputs:
-      mergedSha: ${{ steps.merged.outputs.mergedSha }}
-      targetSha: ${{ steps.merged.outputs.targetSha }}
+      mergedSha: ${{ steps.getCommits.outputs.mergedSha }}
+      targetSha: ${{ steps.getCommits.outputs.targetSha }}
       systems: ${{ steps.systems.outputs.systems }}
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -32,6 +40,7 @@ jobs:
           sparse-checkout: ci
 
       - name: Check if the PR can be merged and get the test merge commit
+        if: inputs.leader == true
         id: merged
         env:
           GH_TOKEN: ${{ github.token }}
@@ -44,14 +53,62 @@ jobs:
             pull_request*)
               if commits=$(base/ci/get-merge-commit.sh ${{ github.repository }} ${{ github.event.number }}); then
                 echo -e "Checking the commits:\n$commits"
-                echo "$commits" >> "$GITHUB_OUTPUT"
+                echo "$commits" >> checked-commits
               else
                 # Skipping so that no notifications are sent
                 echo "Skipping the rest..."
               fi
               ;;
           esac
 
+      - name: Upload commits to check as an artifact
+        if: inputs.leader == true
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: checked-commits
+          path: checked-commits
+
+      - name: Get leader workflow's run ID
+        id: getLeaderRun
+        timeout-minutes: 5
+        if: inputs.leader == false
+        env:
+          LEADER: ${{ inputs.leaderName }}
+          GH_TOKEN: ${{ github.token }}
+          REPOSITORY: ${{ github.repository }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+        run: |
+          if ! run=$(gh api --method GET /repos/"$REPOSITORY"/actions/workflows/"$LEADER"/runs \
+            -f head_sha="$HEAD_SHA" \
+            --jq '.workflow_runs | sort_by(.run_started_at) | .[-1]') \
+            || [[ -z "$run" ]]; then
+            echo "Could not find a leader ($LEADER) workflow run for $HEAD_SHA"
+            exit 1
+          fi
+          echo "Leader is $(jq .html_url <<< "$run")"
+          runId=$(jq .id <<< "$run")
+          artifactCount=$(gh api --method GET /repos/"$REPOSITORY"/actions/runs/"$runId"/artifacts --jq '.total_count')
+          while (( artifactCount < 1 )); do
+            echo "Leader workflow didn't get merge commit yet, waiting 10 seconds before checking again"
+            sleep 10
+            artifactCount=$(gh api --method GET /repos/"$REPOSITORY"/actions/runs/"$runId"/artifacts --jq '.total_count')
+          done
+
+          echo "leaderRunId=$runId" >> "$GITHUB_OUTPUT"
+
+      - name: Download commit hashes from leader
+        if: inputs.leader == false
+        uses: actions/download-artifact@v4
+        with:
+          name: checked-commits
+          github-token: ${{ github.token }}
+          run-id: ${{ steps.getLeaderRun.outputs.leaderRunId }}
+
+      - name: Output commits
+        id: getCommits
+        run: |
+          cat checked-commits >> "$GITHUB_OUTPUT"
+
       - name: Load supported systems
         id: systems
         run: |

diff --git a/.github/workflows/release-checks.yml b/.github/workflows/release-checks.yml
@@ -0,0 +1,142 @@
+name: Release checks
+
+on:
+  pull_request:
+    paths:
+      - .github/workflows/release-checks.yml
+  pull_request_target:
+
+permissions: {}
+
+jobs:
+  get-merge-commit:
+    uses: ./.github/workflows/get-merge-commit.yml
+
+  outpaths:
+    name: Outpaths
+    runs-on: ubuntu-24.04-arm
+    needs: [ get-merge-commit ]
+    strategy:
+      fail-fast: false
+      matrix:
+        system: ${{ fromJSON(needs.get-merge-commit.outputs.systems) }}
+    steps:
+      - name: Enable swap
+        run: |
+          sudo fallocate -l 10G /swap
+          sudo chmod 600 /swap
+          sudo mkswap /swap
+          sudo swapon /swap
+
+      - name: Check out the PR at the test merge commit
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          ref: ${{ needs.get-merge-commit.outputs.mergedSha }}
+          path: nixpkgs
+
+      - name: Install Nix
+        uses: cachix/install-nix-action@526118121621777ccd86f79b04685a9319637641 # v31
+        with:
+          extra_nix_config: sandbox = true
+
+      - name: Evaluate the ${{ matrix.system }} output paths for all derivation attributes from a different path
+        env:
+          MATRIX_SYSTEM: ${{ matrix.system }}
+        run: |
+          nix-build nixpkgs/ci -A eval.releaseChecks.secondEval \
+            --argstr evalSystem "$MATRIX_SYSTEM" \
+            --arg chunkSize 10000
+          # If it uses too much memory, slightly decrease chunkSize
+
+      - name: Upload the output paths and eval stats
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: intermediate-${{ matrix.system }}
+          path: result
+
+  release-checks:
+    name: Release checks
+    runs-on: ubuntu-24.04-arm
+    needs: [ outpaths, get-merge-commit ]
+    steps:
+      - name: Download output paths and eval stats from second eval for all systems
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          pattern: intermediate-*
+          path: second
+
+      - name: Check out the PR at the test merge commit
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          ref: ${{ needs.get-merge-commit.outputs.mergedSha }}
+          fetch-depth: 2
+          path: nixpkgs
+
+      - name: Install Nix
+        uses: cachix/install-nix-action@526118121621777ccd86f79b04685a9319637641 # v31
+        with:
+          extra_nix_config: sandbox = true
+
+      - name: Get first eval run id
+        id: evalRunId
+        run: |
+          # Get the latest eval.yml workflow run for the head commit
+          if ! run=$(gh api --method GET /repos/"$REPOSITORY"/actions/workflows/eval.yml/runs \
+            -f head_sha="$HEAD_SHA" \
+            --jq '.workflow_runs | sort_by(.run_started_at) | .[-1]') \
+            || [[ -z "$run" ]]; then
+            echo "Could not find an eval.yml workflow run for $HEAD_SHA, cannot make comparison"
+            exit 1
+          fi
+          echo "First eval is $(jq .html_url <<< "$run")"
+          runId=$(jq .id <<< "$run")
+          conclusion=$(jq -r .conclusion <<< "$run")
+
+          while [[ "$conclusion" == null || "$conclusion" == "" ]]; do
+            echo "Workflow not done, waiting 10 seconds before checking again"
+            sleep 10
+            conclusion=$(gh api /repos/"$REPOSITORY"/actions/runs/"$runId" --jq '.conclusion')
+          done
+
+          if [[ "$conclusion" != "success" ]]; then
+            echo "Workflow was not successful (conclusion: $conclusion), cannot perform release checks"
+            exit 1
+          fi
+
+          echo "evalRunId=$runId" >> "$GITHUB_OUTPUT"
+        env:
+          REPOSITORY: ${{ github.repository }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+          GH_TOKEN: ${{ github.token }}
+
+      - name: Download output paths and eval stats from first eval for all systems
+        uses: actions/download-artifact@v4
+        if: steps.evalRunId.outputs.evalRunId
+        with:
+          pattern: intermediate-*
+          path: first
+          github-token: ${{ github.token }}
+          run-id: ${{ steps.evalRunId.outputs.evalRunId }}
+
+      - name: Run release checks
+        if: steps.evalRunId.outputs.evalRunId
+        run: |
+          failed=
+          for system in $(echo -n "$SYSTEMS" | jq -r ".[]"); do
+            echo "::group::$system"
+
+            nix-build nixpkgs/ci -A eval.releaseChecks \
+              --argstr evalSystem "$system" \
+              --arg firstEval "./first/intermediate-$system" \
+              --arg secondEval "./second/intermediate-$system" \
+              --arg chunkSize 10000 || failed=1
+
+            echo "::endgroup::"
+          done
+
+          if [[ $failed ]]; then
+            echo "Release checks failed for at least one system"
+            exit 1
+          fi
+        env:
+          SYSTEMS: ${{ needs.get-merge-commit.outputs.systems }}
diff --git a/ci/OWNERS b/ci/OWNERS
@@ -15,11 +15,11 @@
 
 # CI
 /.github/*_TEMPLATE*                    @SigmaSquadron
-/.github/workflows                      @NixOS/Security @Mic92 @zowoq @infinisil @azuwis @wolfgangwalther
+/.github/workflows                      @NixOS/Security @Mic92 @zowoq @infinisil @azuwis @winterqt @wolfgangwalther
 /.github/workflows/check-format.yml     @infinisil @wolfgangwalther
 /.github/workflows/codeowners-v2.yml    @infinisil @wolfgangwalther
 /.github/workflows/nixpkgs-vet.yml      @infinisil @philiptaron @wolfgangwalther
-/ci                                     @infinisil @philiptaron @NixOS/Security @wolfgangwalther
+/ci                                     @infinisil @philiptaron @NixOS/Security @winterqt @wolfgangwalther
 /ci/OWNERS                              @infinisil @philiptaron
 
 # Development support

diff --git a/ci/eval/compare/cmp-stats.py b/ci/eval/compare/cmp-stats.py
@@ -122,6 +122,10 @@ def perform_pairwise_tests(before_metrics: dict, after_metrics: dict) -> pd.Data
                 "t_stat": t_stat
             })
 
+    if len(results) == 0:
+        print("⚠️  Skipping comparison: we don't have enough results to compare. (Run evals without `quickTest` set to compare stats.)")
+        exit(0)
+
     df = pd.DataFrame(results).sort_values("p_value")
     return df