From 06cf7792adc46bda62e2bb44e238757d7151ec9b Mon Sep 17 00:00:00 2001 From: Jakub Florkowski Date: Wed, 28 Jan 2026 15:09:31 +0100 Subject: [PATCH 1/3] Add PR label management to test verification skill Enhances the verification process by automatically adding or removing PR labels (`s/ai-reproduction-confirmed` and `s/ai-reproduction-failed`) based on test outcomes. Updates documentation to describe label behavior and integrates label updates into both quick and full verification modes. --- .../verify-tests-fail-without-fix/SKILL.md | 21 +++++++- .../scripts/verify-tests-fail.ps1 | 48 +++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/.github/skills/verify-tests-fail-without-fix/SKILL.md b/.github/skills/verify-tests-fail-without-fix/SKILL.md index c20b848c406b..ba3df1d1aaa6 100644 --- a/.github/skills/verify-tests-fail-without-fix/SKILL.md +++ b/.github/skills/verify-tests-fail-without-fix/SKILL.md @@ -81,7 +81,8 @@ The script auto-detects which mode to use based on whether fix files are present 1. Fetches base branch from origin (if available) 2. Auto-detects test classes from changed test files 3. Runs tests (should FAIL to prove they catch the bug) -4. Reports result +4. **Updates PR labels** based on result +5. Reports result **Full Verification Mode (fix files detected):** 1. Fetches base branch from origin to ensure accurate diff @@ -94,7 +95,23 @@ The script auto-detects which mode to use based on whether fix files are present 8. **Generates markdown reports**: - `CustomAgentLogsTmp/TestValidation/verification-report.md` - Full detailed report - `CustomAgentLogsTmp/PRState/verification-report.md` - Gate section for PR agent -9. Reports result +9. **Updates PR labels** based on result +10. Reports result + +## PR Labels + +The skill automatically manages two labels on the PR to indicate verification status: + +| Label | Color | When Applied | +|-------|-------|--------------| +| `s/ai-reproduction-confirmed` | 🟢 Green (#2E7D32) | Tests correctly FAIL without fix (AI verified tests catch the bug) | +| `s/ai-reproduction-failed` | 🟠 Orange (#E65100) | Tests PASS without fix (AI verified tests don't catch the bug) | + +**Behavior:** +- When verification passes, adds `s/ai-reproduction-confirmed` and removes `s/ai-reproduction-failed` if present +- When verification fails, adds `s/ai-reproduction-failed` and removes `s/ai-reproduction-confirmed` if present +- If a PR is re-verified after fixing tests, labels are updated accordingly +- No label = AI hasn't verified tests yet ## Output Files diff --git a/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 b/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 index c1e8dce9f0a7..71b584ee80b4 100644 --- a/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 +++ b/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 @@ -124,6 +124,50 @@ $BaselineScript = Join-Path $RepoRoot ".github/scripts/EstablishBrokenBaseline.p # Import Test-IsTestFile and Find-MergeBase from shared script . $BaselineScript +# ============================================================ +# Label management for verification results +# ============================================================ +$LabelConfirmed = "s/ai-reproduction-confirmed" +$LabelFailed = "s/ai-reproduction-failed" + +function Update-VerificationLabels { + param( + [Parameter(Mandatory = $true)] + [bool]$ReproductionConfirmed, + + [Parameter(Mandatory = $false)] + [string]$PR = $PRNumber + ) + + if ($PR -eq "unknown" -or -not $PR) { + Write-Host "⚠️ Cannot update labels: PR number not available" -ForegroundColor Yellow + return + } + + $labelToAdd = if ($ReproductionConfirmed) { $LabelConfirmed } else { $LabelFailed } + $labelToRemove = if ($ReproductionConfirmed) { $LabelFailed } else { $LabelConfirmed } + + Write-Host "" + Write-Host "🏷️ Updating verification labels on PR #$PR..." -ForegroundColor Cyan + + # Remove the opposite label if it exists + $existingLabels = gh pr view $PR --json labels --jq '.labels[].name' 2>$null + if ($existingLabels -contains $labelToRemove) { + Write-Host " Removing: $labelToRemove" -ForegroundColor Yellow + gh pr edit $PR --remove-label $labelToRemove 2>$null + } + + # Add the appropriate label + Write-Host " Adding: $labelToAdd" -ForegroundColor Green + gh pr edit $PR --add-label $labelToAdd 2>$null + + if ($LASTEXITCODE -eq 0) { + Write-Host "✅ Labels updated successfully" -ForegroundColor Green + } else { + Write-Host "⚠️ Failed to update labels (may not have permission)" -ForegroundColor Yellow + } +} + # ============================================================ # Auto-detect test filter from changed files # ============================================================ @@ -392,6 +436,7 @@ if ($DetectedFixFiles.Count -eq 0) { Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Green Write-Host "" Write-Host "Failed tests: $($testResult.FailCount)" -ForegroundColor Yellow + Update-VerificationLabels -ReproductionConfirmed $true exit 0 } else { # Tests PASSED - this is bad! @@ -412,6 +457,7 @@ if ($DetectedFixFiles.Count -eq 0) { Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Red Write-Host "" Write-Host "Passed tests: $($testResult.PassCount)" -ForegroundColor Yellow + Update-VerificationLabels -ReproductionConfirmed $false exit 1 } } @@ -806,6 +852,7 @@ if ($verificationPassed) { Write-Host "║ - FAIL without fix (as expected) ║" -ForegroundColor Green Write-Host "║ - PASS with fix (as expected) ║" -ForegroundColor Green Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Green + Update-VerificationLabels -ReproductionConfirmed $true exit 0 } else { Write-Host "" @@ -827,5 +874,6 @@ if ($verificationPassed) { Write-Host "║ 3. The issue was already fixed in base branch ║" -ForegroundColor Red Write-Host "║ 4. Build caching - try clean rebuild ║" -ForegroundColor Red Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Red + Update-VerificationLabels -ReproductionConfirmed $false exit 1 } From 9d0fcce254bff71a3e5daa7ec4ce8434b3b597d4 Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Wed, 28 Jan 2026 10:58:46 -0600 Subject: [PATCH 2/3] - fix up script a bit --- .../scripts/verify-tests-fail.ps1 | 42 ++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 b/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 index 71b584ee80b4..f8e75eec8a84 100644 --- a/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 +++ b/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 @@ -98,14 +98,34 @@ if (-not $PRNumber) { $PRNumber = $matches[1] Write-Host "✅ Auto-detected PR #$PRNumber from branch name" -ForegroundColor Green } else { - # Try gh cli + $foundPR = $false + # Try gh cli - first try 'gh pr view' for current branch try { $prInfo = gh pr view --json number 2>$null | ConvertFrom-Json - if ($prInfo.number) { + if ($prInfo -and $prInfo.number) { $PRNumber = $prInfo.number - Write-Host "✅ Auto-detected PR #$PRNumber from gh cli" -ForegroundColor Green + $foundPR = $true + Write-Host "✅ Auto-detected PR #$PRNumber from gh cli (pr view)" -ForegroundColor Green } } catch { + # gh pr view failed, will try fallback + } + + # Fallback: search for PRs with this branch as head (works across forks) + if (-not $foundPR) { + try { + $prList = gh pr list --head $currentBranch --json number --limit 1 2>$null | ConvertFrom-Json + if ($prList -and $prList.Count -gt 0 -and $prList[0].number) { + $PRNumber = $prList[0].number + $foundPR = $true + Write-Host "✅ Auto-detected PR #$PRNumber from gh cli (pr list --head)" -ForegroundColor Green + } + } catch { + # gh pr list also failed + } + } + + if (-not $foundPR) { Write-Host "⚠️ Could not auto-detect PR number - using 'unknown' folder" -ForegroundColor Yellow $PRNumber = "unknown" } @@ -150,21 +170,21 @@ function Update-VerificationLabels { Write-Host "" Write-Host "🏷️ Updating verification labels on PR #$PR..." -ForegroundColor Cyan - # Remove the opposite label if it exists + # Remove the opposite label if it exists (using REST API to avoid GraphQL deprecation issues) $existingLabels = gh pr view $PR --json labels --jq '.labels[].name' 2>$null if ($existingLabels -contains $labelToRemove) { Write-Host " Removing: $labelToRemove" -ForegroundColor Yellow - gh pr edit $PR --remove-label $labelToRemove 2>$null + gh api "repos/dotnet/maui/issues/$PR/labels/$labelToRemove" --method DELETE 2>$null | Out-Null } - # Add the appropriate label + # Add the appropriate label (using REST API to avoid GraphQL deprecation issues) Write-Host " Adding: $labelToAdd" -ForegroundColor Green - gh pr edit $PR --add-label $labelToAdd 2>$null + $result = gh api "repos/dotnet/maui/issues/$PR/labels" --method POST -f "labels[]=$labelToAdd" 2>&1 if ($LASTEXITCODE -eq 0) { Write-Host "✅ Labels updated successfully" -ForegroundColor Green } else { - Write-Host "⚠️ Failed to update labels (may not have permission)" -ForegroundColor Yellow + Write-Host "⚠️ Failed to update labels: $result" -ForegroundColor Yellow } } @@ -756,9 +776,10 @@ Write-Log "==========================================" foreach ($file in $RevertableFiles) { Write-Log " Reverting: $file" - git checkout $MergeBase -- $file 2>&1 | Out-Null + $gitOutput = git checkout $MergeBase -- $file 2>&1 if ($LASTEXITCODE -ne 0) { Write-Log " ERROR: Failed to revert $file from $MergeBase" + Write-Log " Git output: $gitOutput" exit 1 } } @@ -785,9 +806,10 @@ Write-Log "==========================================" foreach ($file in $RevertableFiles) { Write-Log " Restoring: $file" - git checkout HEAD -- $file 2>&1 | Out-Null + $gitOutput = git checkout HEAD -- $file 2>&1 if ($LASTEXITCODE -ne 0) { Write-Log " ERROR: Failed to restore $file from HEAD" + Write-Log " Git output: $gitOutput" exit 1 } } From 005d3c07be084ca595be125d46f828b43f2a0c28 Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Wed, 28 Jan 2026 21:33:00 -0600 Subject: [PATCH 3/3] Fix: Check LASTEXITCODE for both label operations Address Copilot review feedback - track success of both DELETE and POST operations separately. If the DELETE fails but POST succeeds, report partial failure instead of full success to avoid misleading logs and potential conflicting label state. --- .../scripts/verify-tests-fail.ps1 | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 b/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 index f8e75eec8a84..f292c83f84fb 100644 --- a/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 +++ b/.github/skills/verify-tests-fail-without-fix/scripts/verify-tests-fail.ps1 @@ -170,19 +170,29 @@ function Update-VerificationLabels { Write-Host "" Write-Host "🏷️ Updating verification labels on PR #$PR..." -ForegroundColor Cyan + # Track success for both operations + $removeSuccess = $true + # Remove the opposite label if it exists (using REST API to avoid GraphQL deprecation issues) $existingLabels = gh pr view $PR --json labels --jq '.labels[].name' 2>$null if ($existingLabels -contains $labelToRemove) { Write-Host " Removing: $labelToRemove" -ForegroundColor Yellow gh api "repos/dotnet/maui/issues/$PR/labels/$labelToRemove" --method DELETE 2>$null | Out-Null + if ($LASTEXITCODE -ne 0) { + $removeSuccess = $false + Write-Host " ⚠️ Failed to remove label: $labelToRemove" -ForegroundColor Yellow + } } # Add the appropriate label (using REST API to avoid GraphQL deprecation issues) Write-Host " Adding: $labelToAdd" -ForegroundColor Green $result = gh api "repos/dotnet/maui/issues/$PR/labels" --method POST -f "labels[]=$labelToAdd" 2>&1 + $addSuccess = $LASTEXITCODE -eq 0 - if ($LASTEXITCODE -eq 0) { + if ($addSuccess -and $removeSuccess) { Write-Host "✅ Labels updated successfully" -ForegroundColor Green + } elseif ($addSuccess) { + Write-Host "⚠️ Label added but failed to remove old label" -ForegroundColor Yellow } else { Write-Host "⚠️ Failed to update labels: $result" -ForegroundColor Yellow }