Azure · spboyer · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
@@ -0,0 +1,83 @@
+name: "Eval: E2E Lifecycle"
+
+on:
+  schedule:
+    # 6am UTC Monday
+    - cron: "0 6 * * 1"
+  workflow_dispatch:
+
+permissions:
+  id-token: write
+  contents: read
+
+jobs:
+  e2e-lifecycle:
+    runs-on: ubuntu-latest
+    env:
+      AZURE_ENV_NAME: eval-e2e-${{ github.run_id }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: "cli/azd/go.mod"
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "22"
+
+      - name: Build azd
+        working-directory: cli/azd
+        run: go build -o ./azd .
+
+      - name: Add azd to PATH
+        run: echo "${{ github.workspace }}/cli/azd" >> "$GITHUB_PATH"
+
+      - name: Azure Login (OIDC)
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+
+      - name: Install Waza CLI
+        run: npm install -g waza
+
+      - name: Install eval dependencies
+        working-directory: cli/azd/test/eval
+        run: npm ci
+
+      - name: Run lifecycle evaluations
+        working-directory: cli/azd/test/eval
+        continue-on-error: true
+        timeout-minutes: 40
+        env:
+          COPILOT_CLI_TOKEN: ${{ secrets.COPILOT_CLI_TOKEN }}
+          AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+        run: waza run --executor copilot-sdk --filter "tasks/lifecycle/"
+
+      - name: Upload E2E results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: e2e-results-${{ github.run_id }}
+          path: cli/azd/test/eval/reports/
+          retention-days: 30
+
+      - name: Cleanup Azure resources
+        if: always()
+        shell: pwsh
+        run: |
+          # Tag matching resource groups for deletion so the cleanup script
+          # can detect and remove resources that resist deletion.
+          $deleteAfter = (Get-Date).ToUniversalTime().AddHours(1).ToString('o')
+          $groupsToDelete = az group list --query "[?starts_with(name, 'rg-eval-')].name" -o tsv
+
+          foreach ($group in $groupsToDelete) {
+            az group update --name $group --set "tags.DeleteAfter=$deleteAfter" 2>$null
+          }
+
+          foreach ($group in $groupsToDelete) {
+            az group delete --name $group --yes --no-wait 2>$null
+          }
+        continue-on-error: true
@@ -0,0 +1,62 @@
+name: "Eval: Weekly Report"
+
+on:
+  schedule:
+    # 8am UTC Monday, after E2E completes
+    - cron: "0 8 * * 1"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  actions: read
+
+jobs:
+  generate-report:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "22"
+
+      - name: Install eval dependencies
+        working-directory: cli/azd/test/eval
+        run: npm ci
+
+      - name: Download recent Waza artifacts
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          mkdir -p cli/azd/test/eval/reports/waza
+          RUN_ID=$(gh api "repos/${{ github.repository }}/actions/workflows/eval-waza.yml/runs?branch=main" \
+            --jq '.workflow_runs | map(select(.conclusion == "success")) | .[0].id // empty' 2>/dev/null)
+          if [ -n "$RUN_ID" ]; then
+            gh run download "$RUN_ID" -D cli/azd/test/eval/reports/waza 2>/dev/null || echo "No waza artifacts found"
+          else
+            echo "No successful waza runs found, skipping"
+          fi
+
+      - name: Download recent E2E artifacts
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          mkdir -p cli/azd/test/eval/reports/e2e
+          RUN_ID=$(gh api "repos/${{ github.repository }}/actions/workflows/eval-e2e.yml/runs?branch=main" \
+            --jq '.workflow_runs | map(select(.conclusion == "success")) | .[0].id // empty' 2>/dev/null)
+          if [ -n "$RUN_ID" ]; then
+            gh run download "$RUN_ID" -D cli/azd/test/eval/reports/e2e 2>/dev/null || echo "No e2e artifacts found"
+          else
+            echo "No successful e2e runs found, skipping"
+          fi
+
+      # TODO: Implement report generation script (scripts/generate-report.ts)
+      # that diffs Waza result JSON files and produces regression-issues.json.
+      # Once implemented, add a step to create GitHub issues from regressions.
+
+      - name: Upload aggregated artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-weekly-report-${{ github.run_id }}
+          path: cli/azd/test/eval/reports/
+          retention-days: 90
@@ -0,0 +1,67 @@
+name: "Eval: Unit Tests"
+
+on:
+  pull_request:
+    paths:
+      - "cli/azd/test/eval/**"
+      - "cli/azd/internal/mcp/**"
+      - "cli/azd/cmd/mcp.go"
+      - "cli/azd/cmd/root.go"
+
+permissions:
+  contents: read
+
+jobs:
+  unit-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: "cli/azd/go.mod"
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "22"
+
+      - name: Build azd
+        working-directory: cli/azd
+        run: go build -o ./azd .
+
+      - name: Install eval dependencies
+        working-directory: cli/azd/test/eval
+        run: npm ci
+
+      - name: Run unit tests
+        working-directory: cli/azd/test/eval
+        run: npm run test:unit -- --ci
+
+      - name: Validate Waza task YAML
+        working-directory: cli/azd/test/eval
+        run: |
+          if command -v waza &>/dev/null; then
+            npm run waza:validate
+          else
+            echo "waza CLI not installed, skipping YAML validation"
+          fi
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install grader test dependencies
+        run: pip install pytest
+
+      - name: Run grader tests
+        working-directory: cli/azd/test/eval/graders
+        run: python -m pytest test_graders.py -v
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-unit-results
+          path: cli/azd/test/eval/reports/
+          retention-days: 30
@@ -0,0 +1,53 @@
+name: "Eval: Waza Runs"
+
+on:
+  schedule:
+    # 5am, 12pm, 8pm UTC, Tuesday through Saturday
+    - cron: "0 5,12,20 * * 2-6"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  waza-run:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: "cli/azd/go.mod"
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "22"
+
+      - name: Build azd
+        working-directory: cli/azd
+        run: go build -o ./azd .
+
+      - name: Add azd to PATH
+        run: echo "${{ github.workspace }}/cli/azd" >> "$GITHUB_PATH"
+
+      - name: Install Waza CLI
+        run: npm install -g waza
+
+      - name: Install eval dependencies
+        working-directory: cli/azd/test/eval
+        run: npm ci
+
+      - name: Run Waza evaluations
+        timeout-minutes: 30
+        working-directory: cli/azd/test/eval
+        env:
+          COPILOT_CLI_TOKEN: ${{ secrets.COPILOT_CLI_TOKEN }}
+        run: waza run --executor copilot-sdk
+
+      - name: Upload Waza results
+        if: always()  # Upload results even when evals fail
+        uses: actions/upload-artifact@v4
+        with:
+          name: waza-results-${{ github.run_id }}
+          path: cli/azd/test/eval/reports/
+          retention-days: 30
@@ -367,6 +367,28 @@ overrides:
     words:
       - covdata
       - GOWORK
+  - filename: test/eval/README.md
+    words:
+      - Waza
+      - waza
+      - urlopen
+  - filename: "test/eval/graders/*.py"
+    words:
+      - Waza
+      - waza
+      - hdrs
+      - mysite
+      - mydb
+  - filename: "test/eval/tasks/**/*.yaml"
+    words:
+      - authenticat
+      - idempoten
+  - filename: "test/eval/tests/human/*.test.ts"
+    words:
+      - compdef
+      - badcfg
+      - provison
+      - notacommand
 ignorePaths:
   - "**/*_test.go"
   - "**/mock*.go"

@@ -0,0 +1,6 @@
+node_modules/
+dist/
+reports/*.json
+reports/*.md
+reports/junit.xml
+!reports/.gitkeep