diff --git a/.github/README-AI.md b/.github/README-AI.md index 9eeb1f0532ca..a1577597031e 100644 --- a/.github/README-AI.md +++ b/.github/README-AI.md @@ -204,15 +204,17 @@ Agents work with **time budgets as estimates for planning**, not hard deadlines: - **`pr-review/pr-preflight.md`** - Phase 1: Context gathering (phase doc, not a standalone skill) - **`pr-review/pr-gate.md`** - Phase 2: Test verification (phase doc, not a standalone skill) - **`pr-review/pr-report.md`** - Phase 4: Final recommendation (phase doc, not a standalone skill) -- **`agents/sandbox-agent.md`** - Sandbox agent for testing and experimentation -- **`agents/write-tests-agent.md`** - Test writing agent (dispatches to skills like write-ui-tests) +- **`agents/sandbox-agent.agent.md`** - Sandbox agent for testing and experimentation +- **`agents/write-tests-agent.agent.md`** - Test writing agent (dispatches to skills like write-ui-tests) +- **`agents/learn-from-pr.agent.md`** - Extracts lessons from PRs and applies improvements ### Agent Files Agent files in the `.github/agents/` directory: -- **`agents/sandbox-agent.md`** - Sandbox app testing and experimentation -- **`agents/write-tests-agent.md`** - Test writing (invokes skills like write-ui-tests) +- **`agents/sandbox-agent.agent.md`** - Sandbox app testing and experimentation +- **`agents/write-tests-agent.agent.md`** - Test writing (invokes skills like write-ui-tests) +- **`agents/learn-from-pr.agent.md`** - Extracts PR lessons and applies repo improvements ### Shared Instruction Files @@ -364,8 +366,8 @@ For issues or questions about the AI agent instructions: ## Metrics **Agent Files**: -- 4 agent files (pr-review skill.md, sandbox-agent.md, write-tests-agent.md) -- 5 skills (pr-review, try-fix, verify-tests-fail-without-fix, write-ui-tests, write-xaml-tests, azdo-build-investigator) + 3 phase docs (pr-preflight, pr-gate, pr-report) +- 3 agent files (sandbox-agent.agent.md, write-tests-agent.agent.md, learn-from-pr.agent.md) +- 15 skills (pr-review, try-fix, verify-tests-fail-without-fix, write-ui-tests, write-xaml-tests, azdo-build-investigator, code-review, evaluate-pr-tests, find-reviewable-pr, issue-triage, learn-from-pr, pr-finalize, run-device-tests, run-helix-tests, run-integration-tests) + 3 phase docs (pr-preflight, pr-gate, pr-report) - All validated and consistent with consolidated structure **Automation**: diff --git a/.github/agents/learn-from-pr.md b/.github/agents/learn-from-pr.agent.md similarity index 100% rename from .github/agents/learn-from-pr.md rename to .github/agents/learn-from-pr.agent.md diff --git a/.github/agents/sandbox-agent.md b/.github/agents/sandbox-agent.agent.md similarity index 100% rename from .github/agents/sandbox-agent.md rename to .github/agents/sandbox-agent.agent.md diff --git a/.github/agents/write-tests-agent.md b/.github/agents/write-tests-agent.agent.md similarity index 100% rename from .github/agents/write-tests-agent.md rename to .github/agents/write-tests-agent.agent.md diff --git a/.github/plugin.json b/.github/plugin.json new file mode 100644 index 000000000000..687dc4af25d1 --- /dev/null +++ b/.github/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "dotnet-maui-repo", + "version": "0.1.0", + "description": "Skills and agents for the dotnet/maui repository.", + "skills": ["./skills/"] +} diff --git a/.github/workflows/skill-validation.yml b/.github/workflows/skill-validation.yml new file mode 100644 index 000000000000..1aa2241dd4cb --- /dev/null +++ b/.github/workflows/skill-validation.yml @@ -0,0 +1,1174 @@ +# Skill & agent validation for PRs touching .github/skills/ or .github/agents/. +# +# Two modes: +# 1. Static checks — run automatically on every PR that touches skills/agents. +# 2. LLM evaluation — runs automatically for contributor PRs, or can be +# triggered by a repo contributor posting "/evaluate-skills" on any PR. +# Requires COPILOT_GITHUB_TOKEN secret (Copilot API access). +# +# Trigger model: +# - pull_request_target: runs in the base repo context with full permissions +# and secret access, even for fork PRs. Workflow YAML is always from the +# default branch (not the PR), ensuring security. +# - issue_comment (/evaluate-skills): same security model as pull_request_target. +# Always runs workflow YAML from the default branch. +# +# Security model: +# - Workflow YAML: always from the default branch (enforced by both triggers) +# - Validator binary: downloaded from dotnet/skills releases (trusted) +# - Skill/test content: checked out from the PR via sparse-checkout +# (only .github/skills and .github/agents — markdown/YAML data files) +# - No PR code is compiled or executed +# - LLM evaluation: only runs for PRs from contributors with write+ access, +# or when explicitly triggered via /evaluate-skills by a contributor + +name: Skill Validation + +on: + pull_request_target: + types: [opened, synchronize, reopened] + paths: + - '.github/skills/**' + - '.github/agents/**' + - '.github/plugin.json' + - '.github/workflows/skill-validation.yml' + + issue_comment: + types: [created] + + workflow_dispatch: + +concurrency: + group: >- + skill-validation-${{ + github.event_name == 'issue_comment' + && startsWith(github.event.comment.body, '/evaluate-skills') + && format('eval-{0}', github.event.issue.number) + || github.event_name == 'issue_comment' + && format('noop-{0}-{1}', github.event.issue.number, github.event.comment.id) + || github.event_name == 'pull_request_target' + && format('pr-{0}', github.event.pull_request.number) + || github.run_id + }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: write + issues: write + statuses: write + checks: write + +env: + VALIDATOR_CACHE_PREFIX: skill-validator-linux-x64 + +jobs: + # ========================================================================== + # PR GATE (pull_request_target) + # Determine PR source, author permissions, and changed files. + # ========================================================================== + pr-gate: + name: PR gate + if: github.event_name == 'pull_request_target' + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + outputs: + head_sha: ${{ github.event.pull_request.head.sha }} + head_repo: ${{ github.event.pull_request.head.repo.full_name }} + pr_number: ${{ github.event.pull_request.number }} + is_contributor: ${{ steps.perms.outputs.is_contributor }} + is_fork: ${{ steps.info.outputs.is_fork }} + changed_skills: ${{ steps.discover.outputs.changed_skills }} + has_skill_changes: ${{ steps.discover.outputs.has_skill_changes }} + has_agent_changes: ${{ steps.discover.outputs.has_agent_changes }} + steps: + - name: Determine fork status + id: info + env: + HEAD: ${{ github.event.pull_request.head.repo.full_name }} + BASE: ${{ github.event.pull_request.base.repo.full_name }} + run: | + IS_FORK=$([[ "$HEAD" != "$BASE" ]] && echo true || echo false) + echo "is_fork=$IS_FORK" >> $GITHUB_OUTPUT + echo "PR from $HEAD → $BASE (fork=$IS_FORK)" + + - name: Check PR author permissions + id: perms + env: + GH_TOKEN: ${{ github.token }} + AUTHOR: ${{ github.event.pull_request.user.login }} + run: | + AUTHOR="$AUTHOR" + PERMISSION=$(gh api "repos/${{ github.repository }}/collaborators/${AUTHOR}/permission" \ + --jq '.permission' 2>/dev/null || echo "none") + echo "PR author $AUTHOR has permission: $PERMISSION" + if [[ "$PERMISSION" == "admin" || "$PERMISSION" == "write" || "$PERMISSION" == "maintain" ]]; then + echo "is_contributor=true" >> $GITHUB_OUTPUT + else + echo "is_contributor=false" >> $GITHUB_OUTPUT + fi + + - name: Discover changed files + id: discover + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: | + CHANGED=$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}/files" \ + --paginate --jq '.[].filename') + + SKILL_DIRS=$(echo "$CHANGED" | grep '^\.github/skills/' | \ + sed 's|^\.github/skills/\([^/]*\)/.*|\1|' | sort -u || true) + AGENT_FILES=$(echo "$CHANGED" | grep '^\.github/agents/' || true) + + echo "has_skill_changes=$( [ -n "$SKILL_DIRS" ] && echo true || echo false )" >> $GITHUB_OUTPUT + echo "has_agent_changes=$( [ -n "$AGENT_FILES" ] && echo true || echo false )" >> $GITHUB_OUTPUT + + DELIM="EOF_$(openssl rand -hex 8)" + echo "changed_skills<<$DELIM" >> $GITHUB_OUTPUT + echo "$SKILL_DIRS" >> $GITHUB_OUTPUT + echo "$DELIM" >> $GITHUB_OUTPUT + + echo "Changed skills: $SKILL_DIRS" + echo "Changed agents: $AGENT_FILES" + + # ========================================================================== + # SLASH COMMAND GATE (/evaluate-skills) + # ========================================================================== + slash-gate: + name: Gate (/evaluate-skills) + if: >- + github.event_name == 'issue_comment' && + github.event.issue.pull_request && + startsWith(github.event.comment.body, '/evaluate-skills') + runs-on: ubuntu-latest + outputs: + head_sha: ${{ steps.pr.outputs.head_sha }} + head_repo: ${{ steps.pr.outputs.head_repo }} + pr_number: ${{ steps.pr.outputs.pr_number }} + steps: + - name: Check commenter permissions + env: + GH_TOKEN: ${{ github.token }} + COMMENTER: ${{ github.event.comment.user.login }} + run: | + PERMISSION=$(gh api "repos/${{ github.repository }}/collaborators/${COMMENTER}/permission" \ + --jq '.permission') + echo "Commenter $COMMENTER has permission: $PERMISSION" + if [[ "$PERMISSION" != "admin" && "$PERMISSION" != "write" && "$PERMISSION" != "maintain" ]]; then + echo "::error::User does not have write access" + exit 1 + fi + + - name: Get PR details + id: pr + env: + GH_TOKEN: ${{ github.token }} + ISSUE_NUMBER: ${{ github.event.issue.number }} + run: | + PR_NUMBER="$ISSUE_NUMBER" + PR_DATA=$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}") + HEAD_SHA=$(echo "$PR_DATA" | jq -r '.head.sha') + HEAD_REPO=$(echo "$PR_DATA" | jq -r '.head.repo.full_name') + echo "head_sha=${HEAD_SHA}" >> $GITHUB_OUTPUT + echo "head_repo=${HEAD_REPO}" >> $GITHUB_OUTPUT + echo "pr_number=${PR_NUMBER}" >> $GITHUB_OUTPUT + + - name: Add reaction to comment + env: + GH_TOKEN: ${{ github.token }} + COMMENT_ID: ${{ github.event.comment.id }} + run: | + gh api "repos/${{ github.repository }}/issues/comments/${COMMENT_ID}/reactions" \ + -X POST -f content='eyes' || true + + - name: Set pending commit status + continue-on-error: true + env: + GH_TOKEN: ${{ github.token }} + run: | + gh api "repos/${{ github.repository }}/statuses/${{ steps.pr.outputs.head_sha }}" \ + -f state=pending \ + -f context="skill-validation" \ + -f description="Skill evaluation in progress..." \ + -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + + # ========================================================================== + # STATIC VALIDATION + # Always runs for PRs (all types) and slash-commands. + # ========================================================================== + static-check: + name: Static validation + needs: [pr-gate, slash-gate] + if: >- + always() && !cancelled() && ( + needs.pr-gate.result == 'success' || + needs.slash-gate.result == 'success' || + github.event_name == 'workflow_dispatch' + ) + runs-on: ubuntu-latest + permissions: + contents: read + outputs: + exit_code: ${{ steps.check.outputs.exit_code }} + steps: + - name: Checkout PR content + uses: actions/checkout@v4 + with: + repository: ${{ needs.pr-gate.outputs.head_repo || needs.slash-gate.outputs.head_repo || github.repository }} + ref: ${{ needs.pr-gate.outputs.head_sha || needs.slash-gate.outputs.head_sha || '' }} + sparse-checkout: | + .github/skills + .github/agents + .github/plugin.json + persist-credentials: false + + # ── Download & cache skill-validator ────────────────────────── + - name: Get cache key date + id: cache-date + run: echo "date=$(date +%Y-%m-%d)" >> "$GITHUB_OUTPUT" + + - name: Restore skill-validator from cache + id: cache-sv + uses: actions/cache/restore@v4 + with: + path: skill-validator-bin + key: ${{ env.VALIDATOR_CACHE_PREFIX }}-${{ steps.cache-date.outputs.date }} + restore-keys: | + ${{ env.VALIDATOR_CACHE_PREFIX }}- + + - name: Download skill-validator + if: steps.cache-sv.outputs.cache-hit != 'true' + run: | + mkdir -p skill-validator-bin + curl -fsSL --retry 3 --retry-all-errors -o skill-validator.tar.gz \ + https://github.com/dotnet/skills/releases/download/skill-validator-nightly/skill-validator-linux-x64.tar.gz + tar -xzf skill-validator.tar.gz -C skill-validator-bin + if [ ! -f skill-validator-bin/skill-validator ]; then + echo "::error::skill-validator binary not found after extraction" + exit 1 + fi + chmod +x skill-validator-bin/skill-validator + + - name: Save skill-validator to cache + if: steps.cache-sv.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: skill-validator-bin + key: ${{ env.VALIDATOR_CACHE_PREFIX }}-${{ steps.cache-date.outputs.date }} + + # ── Run skill-validator check ───────────────────────────────── + - name: Run skill-validator check + id: check + shell: bash + env: + CHANGED_SKILLS: ${{ needs.pr-gate.outputs.changed_skills }} + run: | + rc=0 + + if [ -d .github/skills ]; then + echo "::group::Validate skills" + + # For PR path: validate only changed skills for efficiency + # For slash-command or workflow_dispatch: validate all + PR_GATE="${{ needs.pr-gate.result }}" + if [[ "$PR_GATE" == "success" ]]; then + SKILLS_ARG="" + while IFS= read -r skill; do + [ -z "$skill" ] && continue + SKILL_DIR=".github/skills/$skill" + if [ -d "$SKILL_DIR" ]; then + SKILLS_ARG="$SKILLS_ARG --skills $SKILL_DIR" + fi + done <<< "$CHANGED_SKILLS" + # Fallback to all if no specific skills found + [ -z "$SKILLS_ARG" ] && SKILLS_ARG="--skills .github/skills" + else + SKILLS_ARG="--skills .github/skills" + fi + + set +e + skill-validator-bin/skill-validator check $SKILLS_ARG --allow-repo-traversal --verbose 2>&1 | tee skill-check-skills.txt + skills_rc=${PIPESTATUS[0]} + set -e + echo "::endgroup::" + if [ "$skills_rc" -ne 0 ]; then rc=1; fi + fi + + if [ -d .github/agents ]; then + echo "::group::Validate agents" + set +e + skill-validator-bin/skill-validator check --agents .github/agents --verbose 2>&1 | tee skill-check-agents.txt + agents_rc=${PIPESTATUS[0]} + set -e + echo "::endgroup::" + if [ "$agents_rc" -ne 0 ]; then rc=1; fi + fi + + cat skill-check-skills.txt skill-check-agents.txt > sv-output.txt 2>/dev/null || true + echo "exit_code=$rc" >> "$GITHUB_OUTPUT" + + # Step summary + { + echo "## skill-validator check" + echo "" + skill_count=$(find .github/skills -mindepth 1 -maxdepth 1 -type d 2>/dev/null | wc -l) + agent_count=$(find .github/agents -name '*.agent.md' 2>/dev/null | wc -l) + if [ "$rc" -eq 0 ]; then + echo "All checks passed." + echo "" + echo "Validated **${skill_count}** skill(s) and **${agent_count}** agent(s)." + else + for f in skill-check-skills.txt skill-check-agents.txt; do + if [ -f "$f" ]; then + echo "### ${f}" + echo '```' + head -n 200 "$f" + echo '```' + echo "" + fi + done + fi + } >> "$GITHUB_STEP_SUMMARY" + + # ── Upload results for comment job ──────────────────────────── + - name: Save results artifact + if: always() + run: | + mkdir -p sv-results + skill_count=$(find .github/skills -mindepth 1 -maxdepth 1 -type d 2>/dev/null | wc -l) + agent_count=$(find .github/agents -name '*.agent.md' 2>/dev/null | wc -l) + echo "$skill_count" > sv-results/skill-count.txt + echo "$agent_count" > sv-results/agent-count.txt + echo "${{ steps.check.outputs.exit_code }}" > sv-results/exit-code.txt + if [ -f sv-output.txt ]; then + cp sv-output.txt sv-results/sv-output.txt + fi + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: static-check-results + path: sv-results/ + retention-days: 1 + + - name: Fail if checks failed + if: steps.check.outputs.exit_code != '0' + run: exit 1 + + # ========================================================================== + # DISCOVER EVALUATABLE SKILLS + # Only runs when LLM eval should happen (contributor PR or slash-command). + # ========================================================================== + discover-eval: + name: Discover skills to evaluate + needs: [pr-gate, slash-gate] + if: >- + always() && !cancelled() && ( + (needs.pr-gate.result == 'success' && needs.pr-gate.outputs.is_contributor == 'true') || + needs.slash-gate.result == 'success' + ) + runs-on: ubuntu-latest + permissions: + contents: read + outputs: + entries: ${{ steps.find.outputs.entries }} + has_entries: ${{ steps.find.outputs.has_entries }} + steps: + - name: Checkout PR content + uses: actions/checkout@v4 + with: + repository: ${{ needs.pr-gate.outputs.head_repo || needs.slash-gate.outputs.head_repo }} + ref: ${{ needs.pr-gate.outputs.head_sha || needs.slash-gate.outputs.head_sha }} + sparse-checkout: | + .github/skills + .github/plugin.json + persist-credentials: false + + - name: Discover changed files + id: changed + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ needs.pr-gate.outputs.pr_number || needs.slash-gate.outputs.pr_number }} + run: | + CHANGED=$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}/files" \ + --paginate --jq '.[].filename') + + SKILL_DIRS=$(echo "$CHANGED" | grep '^\.github/skills/' | \ + sed 's|^\.github/skills/\([^/]*\)/.*|\1|' | sort -u || true) + + # Check for workflow changes (evaluate all skills with tests) + WORKFLOW_CHANGES=$(echo "$CHANGED" | grep '^\.github/workflows/skill-validation' || true) + + DELIM="EOF_$(openssl rand -hex 8)" + echo "skill_dirs<<$DELIM" >> $GITHUB_OUTPUT + echo "$SKILL_DIRS" >> $GITHUB_OUTPUT + echo "$DELIM" >> $GITHUB_OUTPUT + + if [ -n "$WORKFLOW_CHANGES" ]; then + echo "eval_all=true" >> $GITHUB_OUTPUT + else + echo "eval_all=false" >> $GITHUB_OUTPUT + fi + + - name: Find skills with eval tests + id: find + shell: pwsh + env: + SKILL_DIRS: ${{ steps.changed.outputs.skill_dirs }} + EVAL_ALL: ${{ steps.changed.outputs.eval_all }} + run: | + $entries = @() + $evalAll = $env:EVAL_ALL -eq "true" + + if ($evalAll) { + Write-Host "Workflow changes detected - evaluating all skills with tests" + $skills = @(Get-ChildItem -Path ".github/skills" -Directory | + Select-Object -ExpandProperty Name) + } else { + $raw = $env:SKILL_DIRS + $skills = @($raw.Split("`n", [StringSplitOptions]::RemoveEmptyEntries) | + ForEach-Object { $_.Trim() } | + Where-Object { $_ }) + } + + foreach ($skill in $skills) { + $evalFile = ".github/skills/$skill/tests/eval.yaml" + if (Test-Path $evalFile) { + Write-Host " -> $skill has eval tests" + $entries += @{ + name = $skill + skills_path = ".github/skills/$skill" + tests_path = ".github/skills/$skill/tests" + } + } else { + Write-Host " -> $skill has NO eval tests (static-only)" + } + } + + if ($entries.Count -eq 0) { + Write-Host "No skills with eval tests to evaluate" + echo "entries=[]" >> $env:GITHUB_OUTPUT + echo "has_entries=false" >> $env:GITHUB_OUTPUT + } else { + $json = $entries | ConvertTo-Json -Compress -AsArray + Write-Host "Entries to evaluate: $json" + echo "entries=$json" >> $env:GITHUB_OUTPUT + echo "has_entries=true" >> $env:GITHUB_OUTPUT + } + + # ========================================================================== + # LLM EVALUATION (matrix) + # Runs skill-validator evaluate for each changed skill with eval tests. + # ========================================================================== + evaluate: + name: evaluate (${{ matrix.entry.name }}) + needs: [pr-gate, slash-gate, discover-eval] + if: >- + always() && !cancelled() && + needs.discover-eval.result == 'success' && + needs.discover-eval.outputs.has_entries == 'true' + runs-on: ubuntu-latest + permissions: + contents: read + timeout-minutes: 120 + strategy: + fail-fast: false + matrix: + entry: ${{ fromJson(needs.discover-eval.outputs.entries || '[]') }} + steps: + - name: Checkout PR content + uses: actions/checkout@v4 + with: + repository: ${{ needs.pr-gate.outputs.head_repo || needs.slash-gate.outputs.head_repo }} + ref: ${{ needs.pr-gate.outputs.head_sha || needs.slash-gate.outputs.head_sha }} + sparse-checkout: | + .github/skills + .github/plugin.json + persist-credentials: false + + # ── Prepare test directory layout ───────────────────────────── + # skill-validator evaluate expects tests at //eval.yaml + # but maui keeps them co-located at .github/skills//tests/eval.yaml. + # Create a flat tests directory by copying files to match the expected layout. + - name: Prepare test directory + run: | + mkdir -p eval-tests + for dir in .github/skills/*/tests; do + [ -d "$dir" ] || continue + [ -f "$dir/eval.yaml" ] || continue + skill=$(basename $(dirname "$dir")) + mkdir -p "eval-tests/$skill" + # Copy eval.yaml and any fixture files + cp -r "$dir"/* "eval-tests/$skill/" + done + echo "Prepared test directories:" + find eval-tests -name 'eval.yaml' | sort + + # ── Download & cache skill-validator ────────────────────────── + - name: Get cache key date + id: cache-date + run: echo "date=$(date +%Y-%m-%d)" >> "$GITHUB_OUTPUT" + + - name: Restore skill-validator from cache + id: cache-sv + uses: actions/cache/restore@v4 + with: + path: skill-validator-bin + key: ${{ env.VALIDATOR_CACHE_PREFIX }}-${{ steps.cache-date.outputs.date }} + restore-keys: | + ${{ env.VALIDATOR_CACHE_PREFIX }}- + + - name: Download skill-validator + if: steps.cache-sv.outputs.cache-hit != 'true' + run: | + mkdir -p skill-validator-bin + curl -fsSL --retry 3 --retry-all-errors -o skill-validator.tar.gz \ + https://github.com/dotnet/skills/releases/download/skill-validator-nightly/skill-validator-linux-x64.tar.gz + tar -xzf skill-validator.tar.gz -C skill-validator-bin + if [ ! -f skill-validator-bin/skill-validator ]; then + echo "::error::skill-validator binary not found after extraction" + exit 1 + fi + chmod +x skill-validator-bin/skill-validator + + - name: Save skill-validator to cache + if: steps.cache-sv.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: skill-validator-bin + key: ${{ env.VALIDATOR_CACHE_PREFIX }}-${{ steps.cache-date.outputs.date }} + + # ── Select Copilot token ────────────────────────────────────── + - name: Select Copilot token + id: select-token + env: + TOKEN_1: ${{ secrets.COPILOT_GITHUB_TOKEN }} + TOKEN_2: ${{ secrets.COPILOT_GITHUB_TOKEN_2 }} + TOKEN_3: ${{ secrets.COPILOT_GITHUB_TOKEN_3 }} + run: | + TOKENS=() + NAMES=() + for i in 1 2 3; do + var="TOKEN_$i" + val="${!var}" + if [ -n "$val" ]; then + TOKENS+=("$val") + if [ "$i" -eq 1 ]; then + NAMES+=("COPILOT_GITHUB_TOKEN") + else + NAMES+=("COPILOT_GITHUB_TOKEN_$i") + fi + fi + done + + if [ ${#TOKENS[@]} -eq 0 ]; then + echo "::error::No COPILOT_GITHUB_TOKEN secrets are configured" + exit 1 + fi + + JOB_INDEX="${{ strategy.job-index }}" + RUN_ID="${{ github.run_id }}" + if [ -n "$JOB_INDEX" ] && [ ${#TOKENS[@]} -gt 1 ]; then + IDX=$(( (JOB_INDEX + RUN_ID) % ${#TOKENS[@]} )) + elif [ -n "$JOB_INDEX" ]; then + IDX=0 + else + IDX=$((RANDOM % ${#TOKENS[@]})) + fi + echo "Selected ${NAMES[$IDX]} (1 of ${#TOKENS[@]} available tokens, job-index=${JOB_INDEX:-random})" + + echo "::add-mask::${TOKENS[$IDX]}" + echo "token=${TOKENS[$IDX]}" >> $GITHUB_OUTPUT + + # ── Run LLM evaluation ─────────────────────────────────────── + - name: Run skill-validator evaluate + id: eval-run + env: + COPILOT_TOKEN: ${{ steps.select-token.outputs.token }} + RESULTS_PATH: eval-results/${{ matrix.entry.name }} + SKILLS_PATH: ${{ matrix.entry.skills_path }} + run: | + # skill-validator reads GITHUB_TOKEN for API access + export GITHUB_TOKEN="$COPILOT_TOKEN" + + ARGS="--verdict-warn-only --verbose" + ARGS="$ARGS --results-dir $RESULTS_PATH --reporter console --reporter json --reporter markdown" + ARGS="$ARGS --model claude-opus-4.6" + ARGS="$ARGS --judge-model claude-opus-4.6" + ARGS="$ARGS --runs 3" + ARGS="$ARGS --parallel-skills 2" + ARGS="$ARGS --parallel-scenarios 3" + ARGS="$ARGS --parallel-runs 3" + + set +e + skill-validator-bin/skill-validator evaluate $ARGS \ + --tests-dir eval-tests \ + "$SKILLS_PATH" + EVAL_RC=$? + set -e + + echo "eval_exit_code=$EVAL_RC" >> $GITHUB_OUTPUT + + # Determine actual pass/fail from results.json (the source of truth) + RESULTS_JSON=$(find "$RESULTS_PATH" -name 'results.json' -type f | head -1) + if [ -n "$RESULTS_JSON" ]; then + ALL_PASSED=$(jq 'if .verdicts | length == 0 then false else all(.verdicts[]; .passed) end' "$RESULTS_JSON") + echo "eval_passed=$ALL_PASSED" >> $GITHUB_OUTPUT + else + echo "eval_passed=false" >> $GITHUB_OUTPUT + fi + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: skill-eval-results-${{ matrix.entry.name }} + path: eval-results/${{ matrix.entry.name }}/ + include-hidden-files: true + retention-days: 14 + + # ========================================================================== + # POST PR COMMENT + # Consolidated results (static + eval) posted directly to the PR. + # pull_request_target has write permissions, so no separate workflow needed. + # ========================================================================== + comment: + name: Post results comment + needs: [pr-gate, slash-gate, static-check, discover-eval, evaluate] + if: >- + always() && !cancelled() && ( + needs.pr-gate.result == 'success' || + needs.slash-gate.result == 'success' + ) + runs-on: ubuntu-latest + permissions: + pull-requests: write + issues: write + outputs: + eval_passed: ${{ steps.post-comment.outputs.eval_passed }} + steps: + - name: Download static check results + uses: actions/download-artifact@v4 + with: + name: static-check-results + path: static-results/ + continue-on-error: true + + - name: Download eval result artifacts + if: needs.evaluate.result == 'success' || needs.evaluate.result == 'failure' + uses: actions/download-artifact@v4 + with: + pattern: skill-eval-results-* + path: eval-results/ + merge-multiple: false + continue-on-error: true + + - name: Post comment + id: post-comment + uses: actions/github-script@v7 + env: + PR_NUMBER: ${{ needs.pr-gate.outputs.pr_number || needs.slash-gate.outputs.pr_number }} + STATIC_RESULT: ${{ needs.static-check.result }} + EVAL_RESULT: ${{ needs.evaluate.result }} + HAS_ENTRIES: ${{ needs.discover-eval.outputs.has_entries }} + DISCOVER_RESULT: ${{ needs.discover-eval.result }} + IS_CONTRIBUTOR: ${{ needs.pr-gate.outputs.is_contributor || 'true' }} + with: + script: | + const fs = require('fs'); + const path = require('path'); + + const prNumber = parseInt(process.env.PR_NUMBER, 10); + const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + const marker = ''; + + const staticResult = process.env.STATIC_RESULT; + const evalResult = process.env.EVAL_RESULT; + const hasEntries = process.env.HAS_ENTRIES === 'true'; + const discoverResult = process.env.DISCOVER_RESULT; + const isContributor = process.env.IS_CONTRIBUTOR === 'true'; + const evalRan = discoverResult === 'success'; + + const lines = [marker, '## 🔍 Skill Validation Results', '']; + + // ── Static check section ────────────────────────────── + let staticOutput = ''; + try { + if (fs.existsSync('static-results/sv-output.txt')) { + staticOutput = fs.readFileSync('static-results/sv-output.txt', 'utf8') + .replace(/\x1b\[[0-9;]*m/g, '').trim(); + } + } catch (e) { /* ignore */ } + + const exitCode = (() => { + try { return fs.readFileSync('static-results/exit-code.txt', 'utf8').trim(); } + catch { return '?'; } + })(); + const skillCount = (() => { + try { return fs.readFileSync('static-results/skill-count.txt', 'utf8').trim(); } + catch { return '?'; } + })(); + const agentCount = (() => { + try { return fs.readFileSync('static-results/agent-count.txt', 'utf8').trim(); } + catch { return '?'; } + })(); + + if (staticResult === 'success') { + lines.push('### ✅ Static Checks Passed'); + } else if (staticResult === 'failure') { + lines.push('### ❌ Static Checks Failed'); + } else { + lines.push(`### ⚠️ Static Checks: ${staticResult}`); + } + lines.push(`Skills checked: ${skillCount} | Agents checked: ${agentCount}`); + lines.push(''); + + if (staticOutput) { + const findings = staticOutput.split('\n') + .map(l => l.trim()) + .filter(l => /^[❌⚠ℹ]/.test(l)) + .slice(0, 10); + + if (findings.length > 0) { + lines.push('| Level | Finding |'); + lines.push('|---|---|'); + for (const line of findings) { + const level = line.startsWith('❌') ? '❌' + : line.startsWith('⚠') ? '⚠️' + : 'ℹ️'; + const text = line.replace(/^[❌⚠ℹ️\s]+/, '').replace(/\|/g, '\\|'); + lines.push(`| ${level} | ${text} |`); + } + lines.push(''); + } + + lines.push('
'); + lines.push('Full validator output'); + lines.push(''); + lines.push('```text'); + lines.push(staticOutput.replace(/```/g, '` ` `')); + lines.push('```'); + lines.push(''); + lines.push('
'); + lines.push(''); + } + + // ── Parse eval results from JSON ────────────────────── + // Read results.json files from downloaded artifacts to determine + // actual pass/fail (the source of truth, not the job exit code + // which uses --verdict-warn-only). + let allVerdicts = []; + let evalPassed = true; + let hasResults = false; + const footnotes = []; + + if (fs.existsSync('eval-results')) { + try { + const resultDirs = fs.readdirSync('eval-results').filter(d => + fs.statSync(path.join('eval-results', d)).isDirectory() + ); + + for (const dir of resultDirs) { + const dirPath = path.join('eval-results', dir); + // Recursively find results.json + const allFiles = []; + function walkDir(d) { + for (const f of fs.readdirSync(d)) { + const fp = path.join(d, f); + if (fs.statSync(fp).isDirectory()) walkDir(fp); + else allFiles.push(path.relative(dirPath, fp)); + } + } + walkDir(dirPath); + + const jsonFile = allFiles.find(f => f.endsWith('results.json')); + if (jsonFile) { + hasResults = true; + const data = JSON.parse( + fs.readFileSync(path.join(dirPath, jsonFile), 'utf8') + ); + if (data.verdicts && data.verdicts.length > 0) { + allVerdicts.push(...data.verdicts); + for (const v of data.verdicts) { + if (!v.passed) evalPassed = false; + } + } else { + evalPassed = false; // no verdicts = not passed + } + } + } + } catch (e) { + console.log('Error reading eval results JSON:', e.message); + } + } + + // ── LLM evaluation section ──────────────────────────── + if (!evalRan && !isContributor) { + lines.push('### ⏭️ LLM Evaluation: Skipped'); + lines.push(''); + lines.push('> 💡 LLM evaluation was not run for this external PR.'); + lines.push('> A repository contributor can post `/evaluate-skills` on this PR to trigger full evaluation.'); + lines.push(''); + } else if (!hasEntries) { + lines.push('### ⏭️ LLM Evaluation: Skipped'); + lines.push('_No changed skills with eval tests found._'); + lines.push(''); + } else if (hasResults) { + // Use actual results from JSON to determine status + if (evalPassed) { + lines.push('### ✅ LLM Evaluation Passed'); + } else { + lines.push('### ❌ LLM Evaluation Failed'); + } + const passedCount = allVerdicts.filter(v => v.passed).length; + lines.push(`${passedCount}/${allVerdicts.length} skill(s) passed validation`); + lines.push(''); + + // ── Build results table ───────────────────────────── + if (allVerdicts.length > 0) { + lines.push('| Skill | Scenario | Baseline | Skilled | Verdict |'); + lines.push('|-------|----------|----------|---------|---------|'); + + let fnIndex = 0; + for (const verdict of allVerdicts) { + const scenarios = verdict.scenarios || []; + for (const sc of scenarios) { + const baseScore = sc.baseline?.judgeResult?.overallScore; + const isolatedScore = sc.skilledIsolated?.judgeResult?.overallScore; + const pluginScore = sc.skilledPlugin?.judgeResult?.overallScore; + + // Format scores + const baseStr = baseScore != null ? `${baseScore.toFixed(1)}/5` : '—'; + + // Pick the best skilled score (isolated or plugin) + let skilledStr; + if (isolatedScore != null && pluginScore != null) { + skilledStr = `${isolatedScore.toFixed(1)}/5 (iso) · ${pluginScore.toFixed(1)}/5 (plug)`; + } else if (isolatedScore != null) { + skilledStr = `${isolatedScore.toFixed(1)}/5`; + } else if (pluginScore != null) { + skilledStr = `${pluginScore.toFixed(1)}/5`; + } else { + skilledStr = '—'; + } + + // Timeout indicator + const timeoutFlag = sc.timedOut ? ' ⏳' : ''; + + // Verdict icon — per-scenario: improvement >= 0 means not regressed + const improvement = sc.improvementScore || 0; + const scenarioIcon = improvement >= 0 ? '✅' : '⚠️'; + + // Footnote for high variance or timeout + let footRef = ''; + if (sc.highVariance || sc.timedOut) { + fnIndex++; + const parts = []; + if (sc.highVariance) parts.push(`High run-to-run variance (CV=${(sc.varianceCV || 0).toFixed(2)})`); + if (sc.timedOut) parts.push(`Timeout at ${sc.timeoutSeconds || '?'}s`); + footRef = ` [${fnIndex}]`; + footnotes.push(`[${fnIndex}] ${parts.join('. ')}`); + } + + const safeSkillName = (verdict.skillName || '').replace(/\|/g, '\\|'); + const safeScenarioName = (sc.scenarioName || '').replace(/\|/g, '\\|'); + lines.push(`| ${safeSkillName} | ${safeScenarioName} | ${baseStr}${timeoutFlag} | ${skilledStr}${timeoutFlag} | ${scenarioIcon}${footRef} |`); + } + } + lines.push(''); + + // Overall verdict line per skill + for (const verdict of allVerdicts) { + const icon = verdict.passed ? '✅' : '❌'; + const reason = (verdict.reason || '').replace(/\|/g, '\\|'); + const safeSkillNameSummary = (verdict.skillName || '').replace(/\|/g, '\\|'); + lines.push(`${icon} **${safeSkillNameSummary}**: ${reason}`); + lines.push(''); + } + + // Footnotes + if (footnotes.length > 0) { + for (const fn of footnotes) { + lines.push(fn); + } + lines.push(''); + } + + // Timeout warning + const hasTimeout = allVerdicts.some(v => + (v.scenarios || []).some(s => s.timedOut) + ); + if (hasTimeout) { + lines.push('> ⏳ **timeout** — run(s) hit the scenario timeout limit; scoring may be impacted'); + lines.push(''); + } + } + } else if (evalResult === 'success') { + lines.push('### ✅ LLM Evaluation Passed'); + lines.push(''); + } else if (evalResult === 'failure') { + lines.push('### ❌ LLM Evaluation Failed'); + lines.push(''); + } else if (evalResult === 'skipped') { + lines.push('### ⏭️ LLM Evaluation: Skipped'); + lines.push(''); + } else { + lines.push(`### ⚠️ LLM Evaluation: ${evalResult}`); + lines.push(''); + } + + // Detailed judge reports in collapsible sections + if (fs.existsSync('eval-results')) { + try { + const resultDirs = fs.readdirSync('eval-results').filter(d => + fs.statSync(path.join('eval-results', d)).isDirectory() + ); + + for (const dir of resultDirs) { + const skillName = dir.replace('skill-eval-results-', ''); + const dirPath = path.join('eval-results', dir); + const allFiles = []; + function walkDir2(d) { + for (const f of fs.readdirSync(d)) { + const fp = path.join(d, f); + if (fs.statSync(fp).isDirectory()) walkDir2(fp); + else allFiles.push(path.relative(dirPath, fp)); + } + } + walkDir2(dirPath); + + // Include per-scenario judge reports (not summary.md which duplicates the table) + const mdFiles = allFiles.filter(f => + f.endsWith('.md') && !f.endsWith('summary.md') + ); + for (const mdFile of mdFiles) { + const mdContent = fs.readFileSync( + path.join(dirPath, mdFile), 'utf8' + ).trim(); + if (mdContent.length > 0) { + const scenarioName = path.basename(mdFile, '.md'); + lines.push(`
`); + lines.push(`📊 ${skillName} / ${scenarioName}`); + lines.push(''); + lines.push(mdContent.replace(/```/g, '` ` `').replace(/<\/details>/gi, '</details>')); + lines.push(''); + lines.push('
'); + lines.push(''); + } + } + } + } catch (e) { + console.log('Error reading eval result details:', e.message); + } + } + + // ── Investigation prompt for failures ───────────────── + // When any evaluated skill failed, build a copy-paste prompt + // that tells the user how to download artifacts and investigate + // with their AI coding agent (same pattern as dotnet/skills). + let investigatePrompt = ''; + if (hasResults && !evalPassed) { + const runId = context.runId; + const repo = `${context.repo.owner}/${context.repo.repo}`; + investigatePrompt = [ + '', + '> **To investigate failures**, paste this to your AI coding agent:', + '>', + `> _For PR #${prNumber} in ${repo}, download eval artifacts with ` + + `\`gh run download ${runId} --repo ${repo} --pattern "skill-eval-results-*" --dir ./eval-results\`, ` + + `then fetch https://raw.githubusercontent.com/dotnet/skills/main/eng/skill-validator/src/docs/InvestigatingResults.md ` + + `and follow it to analyze the results.json files. Diagnose each failure, suggest fixes to the eval.yaml ` + + `and skill content, and tell me what to fix first._`, + ].join('\n'); + } + + // ── Pipeline link (styled like dotnet/skills) ───────── + lines.push(`[🔍 Full results and investigation steps](${runUrl})`); + + const body = lines.join('\n'); + + // ── Write step summary with investigation prompt ────── + const summaryPath = process.env.GITHUB_STEP_SUMMARY; + if (summaryPath) { + const summaryLines = ['## Skill Validation Results', '']; + summaryLines.push(body.replace(marker, '').trim()); + if (investigatePrompt) { + summaryLines.push(investigatePrompt); + } + fs.appendFileSync(summaryPath, summaryLines.join('\n') + '\n'); + } + + // Upsert comment (find existing with marker, update or create) + // Paginate to handle PRs with 100+ comments + const comments = await github.paginate( + github.rest.issues.listComments, + { + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + per_page: 100, + } + ); + + const existing = comments.find(c => c.body && c.body.includes(marker)); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + console.log(`Updated existing comment ${existing.id}`); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body, + }); + console.log('Created new PR comment'); + } + + // Save eval pass/fail for downstream jobs + const outputPath = process.env.GITHUB_OUTPUT; + if (outputPath) { + fs.appendFileSync(outputPath, `eval_passed=${hasResults ? evalPassed : 'na'}\n`); + } + + # ========================================================================== + # REPORT STATUS + # Post final commit status on PR head SHA. + # ========================================================================== + report-status: + name: Report status + needs: [pr-gate, slash-gate, static-check, discover-eval, evaluate, comment] + if: >- + always() && !cancelled() && ( + needs.pr-gate.result == 'success' || + needs.slash-gate.result == 'success' + ) + runs-on: ubuntu-latest + permissions: + statuses: write + checks: write + issues: write + steps: + - name: Set commit status + env: + GH_TOKEN: ${{ github.token }} + run: | + HEAD_SHA="${{ needs.pr-gate.outputs.head_sha || needs.slash-gate.outputs.head_sha }}" + if [ -z "$HEAD_SHA" ]; then + echo "No head SHA (workflow_dispatch?) — skipping status" + exit 0 + fi + + STATIC="${{ needs.static-check.result }}" + EVAL="${{ needs.evaluate.result }}" + DISCOVER="${{ needs.discover-eval.result }}" + HAS_ENTRIES="${{ needs.discover-eval.outputs.has_entries }}" + EVAL_PASSED="${{ needs.comment.outputs.eval_passed }}" + + if [[ "$STATIC" == "success" ]]; then + if [[ "$DISCOVER" != "success" || "$HAS_ENTRIES" != "true" ]]; then + STATE="success" + DESC="Skill validation passed (static only)" + elif [[ "$EVAL_PASSED" == "true" ]]; then + STATE="success" + DESC="Skill validation passed" + elif [[ "$EVAL_PASSED" == "false" ]]; then + STATE="failure" + DESC="LLM evaluation failed" + elif [[ "$EVAL" == "failure" ]]; then + STATE="failure" + DESC="LLM evaluation failed" + else + STATE="error" + DESC="Evaluation incomplete ($EVAL)" + fi + elif [[ "$STATIC" == "failure" ]]; then + STATE="failure" + DESC="Static validation failed" + else + STATE="error" + DESC="Validation incomplete (static: $STATIC)" + fi + + # Post commit status (appears in PR status checks) + gh api "repos/${{ github.repository }}/statuses/${HEAD_SHA}" \ + -f state="$STATE" \ + -f context="skill-validation" \ + -f description="$DESC" \ + -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + + # Create a Check Run on the PR head SHA so it shows in the PR Checks tab. + # pull_request_target runs are associated with the base branch, not the PR, + # so without this the workflow link won't appear on the PR. + - name: Create check run + if: always() + uses: actions/github-script@v7 + with: + script: | + const headSha = '${{ needs.pr-gate.outputs.head_sha || needs.slash-gate.outputs.head_sha }}'; + if (!headSha) return; + + const staticResult = '${{ needs.static-check.result }}'; + const evalPassed = '${{ needs.comment.outputs.eval_passed }}'; + const evalResult = '${{ needs.evaluate.result }}'; + const hasEntries = '${{ needs.discover-eval.outputs.has_entries }}' === 'true'; + + let conclusion, title; + if (staticResult !== 'success') { + conclusion = 'failure'; + title = 'Static validation failed'; + } else if (!hasEntries) { + conclusion = 'success'; + title = 'Skill validation passed (static only)'; + } else if (evalPassed === 'true') { + conclusion = 'success'; + title = 'Skill validation passed'; + } else if (evalPassed === 'false') { + conclusion = 'failure'; + title = 'LLM evaluation failed'; + } else if (evalResult === 'failure') { + conclusion = 'failure'; + title = 'LLM evaluation failed'; + } else { + conclusion = 'neutral'; + title = `Evaluation: ${evalResult}`; + } + + await github.rest.checks.create({ + owner: context.repo.owner, + repo: context.repo.repo, + name: 'Skill Validation', + head_sha: headSha, + status: 'completed', + conclusion, + output: { + title, + summary: `[View full results](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})`, + }, + details_url: `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, + }); + console.log(`Created check run: ${conclusion} - ${title}`); + + # Remove eyes reaction (slash command only) + - name: Remove reaction + if: needs.slash-gate.result == 'success' + env: + GH_TOKEN: ${{ github.token }} + run: | + COMMENT_ID="${{ github.event.comment.id }}" + REACTION_ID=$(gh api "repos/${{ github.repository }}/issues/comments/${COMMENT_ID}/reactions" \ + --jq '.[] | select(.content == "eyes" and .user.login == "github-actions[bot]") | .id' \ + | head -1 || echo "") + if [[ -n "$REACTION_ID" && "$REACTION_ID" != "null" ]]; then + gh api "repos/${{ github.repository }}/issues/comments/${COMMENT_ID}/reactions/${REACTION_ID}" \ + -X DELETE || true + fi