diff --git a/.github/agents/maui-expert-reviewer.md b/.github/agents/maui-expert-reviewer.md index 56efe5638df5..eee9bb526dbb 100644 --- a/.github/agents/maui-expert-reviewer.md +++ b/.github/agents/maui-expert-reviewer.md @@ -111,6 +111,7 @@ Every bug fix needs a regression test. Modified code must be checked against git - CHECK: Test covers the specific scenario from the issue report, not a generic case - CHECK: Shared code changes are tested on all affected platforms - CHECK: Previously-fixed issue numbers are cross-referenced when modifying the same code area +- CHECK: If `regression-check/risks.json` exists and contains `REVERT` entries, list the affected fix PRs/issues and require author acknowledgment that the reverted fix is intentional. The regression cross-reference script (`Find-RegressionRisks.ps1`) detects when a PR deletes lines that were previously added by a labeled bug-fix PR. - CHECK: UI tests run on all applicable platforms unless there is a specific technical limitation - CHECK: Snapshot baselines updated across all platforms when changing background color, font, or layout - CHECK: Screenshot size matches capture method — a size mismatch means the capture changed, not the rendering diff --git a/.github/scripts/BuildAndRunHostApp.ps1 b/.github/scripts/BuildAndRunHostApp.ps1 index 76959772f48e..8795fc0a8f68 100644 --- a/.github/scripts/BuildAndRunHostApp.ps1 +++ b/.github/scripts/BuildAndRunHostApp.ps1 @@ -53,7 +53,7 @@ param( [ValidateSet("android", "ios", "catalyst", "maccatalyst", "windows")] [string]$Platform, - [Parameter(Mandatory = $true, ParameterSetName = "TestFilter")] + [Parameter(Mandatory = $false, ParameterSetName = "TestFilter")] [string]$TestFilter, [Parameter(Mandatory = $true, ParameterSetName = "Category")] @@ -219,13 +219,20 @@ Write-Success "Test project: $TestProject" #region Run Tests -# Determine the filter to use +# Determine the filter to use. +# NOTE: The CI pipeline `maui-pr-uitests` (definition 313) uses `TestCategory=` +# (see eng/pipelines/common/ui-tests-steps.yml lines 116-164). NUnit accepts +# both `Category=` and `TestCategory=` but Cake's RunTestWithLocalDotNet uses +# `TestCategory=` so we mirror that here for byte-for-byte parity with CI. if ($Category) { - $effectiveFilter = "Category=$Category" + $effectiveFilter = "TestCategory=$Category" Write-Step "Running UI tests with category: $Category" -} else { +} elseif ($TestFilter) { $effectiveFilter = $TestFilter Write-Step "Running UI tests with filter: $TestFilter" +} else { + $effectiveFilter = $null + Write-Step "Running ALL UI tests (no filter)" } # Clear device logs before test @@ -233,27 +240,30 @@ if ($Platform -eq "android") { Write-Info "Clearing Android logcat buffer before test..." & adb -s $DeviceUdid logcat -c - # Dismiss any ANR dialogs that may have appeared during build/deploy. - # The emulator can sit idle during long builds, causing SystemUI ANR. - Write-Info "Dismissing any system dialogs before test..." + # Wait for Android settings service to be available. + Write-Info "Waiting for Android settings service..." + $settingsReady = $false + for ($i = 0; $i -lt 30; $i++) { + $settingsCheck = & adb -s $DeviceUdid shell settings get global device_name 2>&1 + if ($settingsCheck -and $settingsCheck -notmatch "Can't find service|error") { + $settingsReady = $true + Write-Success "Settings service ready (device_name=$settingsCheck)" + break + } + Write-Info " Settings service not ready yet (attempt $($i+1)/30)..." + Start-Sleep -Seconds 5 + } + if (-not $settingsReady) { + Write-Warn "Settings service may not be ready — tests might fail" + } + + # Do NOT force-stop or restart the app here. Appium's UiAutomator2 + # driver handles app lifecycle via appPackage/appActivity capabilities. + # Manual restart causes double-stop issues and the app ends up in a + # bad state. Just dismiss any system dialogs and let Appium handle it. & adb -s $DeviceUdid shell am broadcast -a android.intent.action.CLOSE_SYSTEM_DIALOGS 2>$null - & adb -s $DeviceUdid shell input keyevent KEYCODE_ENTER 2>$null - & adb -s $DeviceUdid shell input keyevent KEYCODE_BACK 2>$null - Start-Sleep -Seconds 1 & adb -s $DeviceUdid shell input keyevent KEYCODE_WAKEUP 2>$null - & adb -s $DeviceUdid shell input keyevent KEYCODE_MENU 2>$null Start-Sleep -Seconds 1 - - # Check for lingering ANR dialogs via window dump - $windowDump = & adb -s $DeviceUdid shell dumpsys window 2>$null | Select-String "Application Not Responding|ANR" - if ($windowDump) { - Write-Warn "ANR dialog detected — force-dismissing..." - & adb -s $DeviceUdid shell input keyevent KEYCODE_HOME 2>$null - Start-Sleep -Seconds 2 - & adb -s $DeviceUdid shell am broadcast -a android.intent.action.CLOSE_SYSTEM_DIALOGS 2>$null - & adb -s $DeviceUdid shell input keyevent KEYCODE_BACK 2>$null - Start-Sleep -Seconds 1 - } } # Capture test start time for iOS logs @@ -294,7 +304,8 @@ if ($Platform -eq "catalyst") { $env:MAUI_LOG_FILE = $deviceLogFile } -Write-Info "Executing: dotnet test --filter `"$effectiveFilter`"" +$filterDisplay = if ($effectiveFilter) { "--filter `"$effectiveFilter`"" } else { "(no filter — all tests)" } +Write-Info "Executing: dotnet test $filterDisplay" Write-Host "" # Set environment variables for the test @@ -306,9 +317,47 @@ $appiumLogFile = Join-Path $HostAppLogsDir "appium.log" $env:APPIUM_LOG_FILE = $appiumLogFile Write-Info "Set APPIUM_LOG_FILE: $appiumLogFile (screenshots will be saved here)" +# ── TRX setup (mirrors CI: eng/cake/dotnet.cake `RunTestWithLocalDotNet`) ── +# CI writes one trx per test run via: +# --logger "trx;LogFileName=.trx" +# --logger "console;verbosity=normal" +# --results-directory +# /p:VStestUseMSBuildOutput=false +# We reproduce that here so STEP 3's renderer can parse authoritative +# pass/fail counts from the TRX (instead of scraping console output, which is +# fragile when many tests run and lines get interleaved or wrapped). +$trxResultsDir = Join-Path $HostAppLogsDir "TestResults" +if (-not (Test-Path $trxResultsDir)) { + New-Item -ItemType Directory -Path $trxResultsDir -Force | Out-Null +} +# Sanitize the trx file name. NUnit/MSTest reject some characters. We keep +# alpha-numeric, dash, underscore and dot — same set Cake's +# SanitizeTestResultsFilename uses. +$trxBaseName = if ($Category) { "$Category-$Platform" } + elseif ($TestFilter) { ($TestFilter -replace '[^A-Za-z0-9._-]', '_') } + else { "ALL-$Platform" } +$trxBaseName = $trxBaseName -replace '[^A-Za-z0-9._-]', '_' +$trxFileName = "$trxBaseName.trx" +$trxFilePath = Join-Path $trxResultsDir $trxFileName +# Pre-clean stale TRX so we never read a previous run's results +if (Test-Path $trxFilePath) { Remove-Item $trxFilePath -Force -ErrorAction SilentlyContinue } + +Write-Info "TRX file will be written to: $trxFilePath" + try { - # Run dotnet test and capture output - $testOutput = & dotnet test $TestProject --filter $effectiveFilter --logger "console;verbosity=detailed" 2>&1 + # Run dotnet test using the SAME loggers and arguments CI uses in + # `RunTestWithLocalDotNet` (eng/cake/dotnet.cake line 943-981). + $trxRunStart = Get-Date + $testArgs = @($TestProject, + "--logger", "trx;LogFileName=$trxFileName", + "--logger", "console;verbosity=normal", + "--results-directory", $trxResultsDir, + "/p:VStestUseMSBuildOutput=false") + if ($effectiveFilter) { + $testArgs = @($TestProject, "--filter", $effectiveFilter) + $testArgs[1..($testArgs.Length-1)] + } + Write-Info "Actual dotnet test args: $($testArgs -join ' ')" + $testOutput = & dotnet test @testArgs 2>&1 # Save test output to file $testOutput | Out-File -FilePath $testOutputFile -Encoding UTF8 @@ -316,9 +365,141 @@ try { # Output test results to the output stream so callers can capture them # (Write-Host goes to the Information stream which is not captured by 2>&1) $testOutput | ForEach-Object { Write-Output $_ } - + + # Surface the TRX path on a marker line so callers (Invoke-UITestWithRetry + # and Review-PR.ps1) can locate the authoritative results file regardless + # of where the working directory was when this script ran. + if (Test-Path $trxFilePath) { + Write-Output ">>> TRX_RESULT_FILE: $trxFilePath" + } else { + # dotnet test may have written the TRX with a slightly different name + # (e.g. LogFileName argument stripped on Windows, or it injected a + # timestamp). Fall back to scanning the results dir for any .trx + # written AFTER this run started — never pick up a stale TRX from a + # previous category that shares the same results directory. + $latestTrx = Get-ChildItem -Path $trxResultsDir -Filter "*.trx" -ErrorAction SilentlyContinue | + Where-Object { $_.LastWriteTime -ge $trxRunStart } | + Sort-Object LastWriteTime -Descending | Select-Object -First 1 + if ($latestTrx) { + Write-Output ">>> TRX_RESULT_FILE: $($latestTrx.FullName)" + } + } + $testExitCode = $LASTEXITCODE + # ── Per-test retry for flaky failures (Android emulator instability) ── + # Parse the TRX for failed tests and re-run them once. This catches + # emulator-induced timeouts and transient ADB failures that aren't + # real test bugs. Only retry on Android where flake rate is ~5%. + if ($testExitCode -ne 0 -and $Platform -eq 'android' -and (Test-Path $trxFilePath)) { + . "$PSScriptRoot/shared/Get-TrxResults.ps1" + $firstRun = Get-TrxResults -TrxPath $trxFilePath + if ($firstRun -and [int]$firstRun.Failed -gt 0 -and [int]$firstRun.Passed -gt 0) { + $failedNames = @($firstRun.Results | Where-Object { $_.status -eq 'Failed' } | ForEach-Object { $_.name }) + Write-Host "" + Write-Warn "🔄 Retrying $($failedNames.Count) failed test(s) on Android..." + + # Build a FullyQualifiedName filter for just the failed tests. + # Strip parameter signatures (e.g. TestMethod(arg: "val")) because + # VSTest filter grammar treats ( ) | & ! as operators. Using the + # bare method name with ~ (contains) is safe and sufficient. + $safeNames = @($failedNames | ForEach-Object { $_ -replace '\(.*$', '' } | Select-Object -Unique) + $retryFilter = ($safeNames | ForEach-Object { "FullyQualifiedName~$_" }) -join ' | ' + $retryTrx = Join-Path $trxResultsDir "retry-$trxBaseName.trx" + Remove-Item $retryTrx -Force -ErrorAction SilentlyContinue + + $retryArgs = @($TestProject, "--filter", $retryFilter, + "--logger", "trx;LogFileName=retry-$trxFileName", + "--logger", "console;verbosity=normal", + "--results-directory", $trxResultsDir, + "/p:VStestUseMSBuildOutput=false", "--no-build") + Write-Info "Retry args: dotnet test --filter '$retryFilter' --no-build" + $retryOutput = & dotnet test @retryArgs 2>&1 + $retryOutput | ForEach-Object { Write-Output $_ } + $retryExitCode = $LASTEXITCODE + + # Parse retry TRX and count how many passed on retry + $retryTrxPath = Join-Path $trxResultsDir "retry-$trxFileName" + if (Test-Path $retryTrxPath) { + $retryResults = Get-TrxResults -TrxPath $retryTrxPath + if ($retryResults) { + $retryPassed = @($retryResults.Results | Where-Object { $_.status -eq 'Passed' }).Count + $retryFailed = @($retryResults.Results | Where-Object { $_.status -eq 'Failed' }).Count + Write-Host " Retry results: $retryPassed passed, $retryFailed failed (of $($failedNames.Count) retried)" -ForegroundColor Cyan + + if ($retryFailed -eq 0) { + Write-Success "All $retryPassed flaky test(s) passed on retry!" + $testExitCode = 0 + } else { + Write-Warn "$retryFailed test(s) still failing after retry (real failures)" + } + # Merge retry results into the original TRX: replace only the + # retried test entries in the original with their retry outcomes, + # preserving all tests that passed on the first run. This avoids + # the prior bug where Copy-Item overwrote the full TRX with the + # retry-only TRX, losing the first-run passing tests entirely. + try { + [xml]$origXml = Get-Content -Path $trxFilePath -Raw -Encoding UTF8 + [xml]$retryXml = Get-Content -Path $retryTrxPath -Raw -Encoding UTF8 + $nsUri = 'http://microsoft.com/schemas/VisualStudio/TeamTest/2010' + $nsMgr = New-Object System.Xml.XmlNamespaceManager($origXml.NameTable) + $nsMgr.AddNamespace('t', $nsUri) + $retryNsMgr = New-Object System.Xml.XmlNamespaceManager($retryXml.NameTable) + $retryNsMgr.AddNamespace('t', $nsUri) + + # Build a lookup of retry results by testName + $retryByName = @{} + foreach ($rr in $retryXml.SelectNodes('//t:UnitTestResult', $retryNsMgr)) { + $retryByName[$rr.GetAttribute('testName')] = $rr + } + + # Only replace entries that were in the original failed set. + # The retry filter uses substring matching (~) so the retry TRX + # may contain tests that passed on the first run (e.g. other + # parameterizations of the same method). We must NOT overwrite + # those — only replace originally-failed entries. + $failedNameSet = New-Object 'System.Collections.Generic.HashSet[string]' + foreach ($fn in $failedNames) { [void]$failedNameSet.Add($fn) } + + foreach ($origResult in $origXml.SelectNodes('//t:UnitTestResult', $nsMgr)) { + $tName = $origResult.GetAttribute('testName') + if ($failedNameSet.Contains($tName) -and $retryByName.ContainsKey($tName)) { + $imported = $origXml.ImportNode($retryByName[$tName], $true) + $origResult.ParentNode.ReplaceChild($imported, $origResult) | Out-Null + } + } + + # Update counters to reflect merged results. Count outcomes + # using the same logic as Get-TrxResults: Passed stays Passed, + # NotExecuted/Inconclusive are Skipped, everything else is Failed. + $allResults = $origXml.SelectNodes('//t:UnitTestResult', $nsMgr) + $mergedTotal = $allResults.Count + $mergedPassed = @($allResults | Where-Object { $_.GetAttribute('outcome') -eq 'Passed' }).Count + $skippedOutcomes = @('NotExecuted', 'Inconclusive') + $mergedSkipped = @($allResults | Where-Object { $_.GetAttribute('outcome') -in $skippedOutcomes }).Count + $mergedFailed = $mergedTotal - $mergedPassed - $mergedSkipped + $mergedExecuted = $mergedPassed + $mergedFailed + $counters = $origXml.SelectSingleNode('//t:ResultSummary/t:Counters', $nsMgr) + if ($counters) { + $counters.SetAttribute('total', $mergedTotal) + $counters.SetAttribute('executed', $mergedExecuted) + $counters.SetAttribute('passed', $mergedPassed) + $counters.SetAttribute('failed', $mergedFailed) + } + + $origXml.Save($trxFilePath) + Write-Info "Merged retry results into original TRX ($mergedTotal total, $mergedPassed passed, $mergedFailed failed)" + } catch { + Write-Warn "Failed to merge TRX — falling back to retry-only TRX: $_" + Copy-Item $retryTrxPath $trxFilePath -Force + } + # Remove the retry TRX to prevent double-counting by downstream aggregators + Remove-Item $retryTrxPath -Force -ErrorAction SilentlyContinue + } + } + } + } + Write-Host "" Write-Info "Test output saved to: $testOutputFile" @@ -491,7 +672,7 @@ Write-Host @" ╠═══════════════════════════════════════════════════════════╣ ║ Platform: $($Platform.ToUpper().PadRight(10)) ║ ║ Device: $($DeviceUdid.Substring(0, [Math]::Min(40, $DeviceUdid.Length)).PadRight(40)) ║ -║ Test Filter: $($effectiveFilter.Substring(0, [Math]::Min(40, $effectiveFilter.Length)).PadRight(40)) ║ +║ Test Filter: $($(if ($effectiveFilter) { $effectiveFilter.Substring(0, [Math]::Min(40, $effectiveFilter.Length)) } else { '(all tests)' }).PadRight(40)) ║ ║ Result: SUCCESS ✅ ║ ║ Logs: $HostAppLogsDir ╚═══════════════════════════════════════════════════════════╝ diff --git a/.github/scripts/Find-RegressionRisks.ps1 b/.github/scripts/Find-RegressionRisks.ps1 new file mode 100644 index 000000000000..eae088686e68 --- /dev/null +++ b/.github/scripts/Find-RegressionRisks.ps1 @@ -0,0 +1,827 @@ +#!/usr/bin/env pwsh +<# +.SYNOPSIS + Detects regression risks by cross-referencing a PR's deletions against lines added by recent bug-fix PRs. + +.DESCRIPTION + Purely mechanical (no AI / LLM). For each implementation file in the PR diff: + 1. Collects lines REMOVED by the PR being reviewed. + 2. Uses `git log` to find PRs that touched the same file in the last N months. + 3. Filters those to bug-fix PRs (label match: i/regression, t/bug, p/0, p/1; or + linked-issue label match). + 4. Pulls each fix PR's diff and collects lines it ADDED to that same file. + 5. Compares (whitespace-insensitive). If a removed line equals a line a fix PR + added → 🔴 REVERT. Same file but no line match → 🟡 OVERLAP. Otherwise → 🟢 CLEAN. + + Outputs (when -OutputDir is provided): + - content.md Markdown summary suitable for the wall-of-text PR comment. + - risks.json Structured findings for downstream agents. + - result.txt One token: CLEAN | OVERLAP | REVERT (used by Review-PR.ps1 + for branching). + - inline-findings.json (only when -WriteInlineFindings is set and reverts found) + +.PARAMETER PRNumber + The PR number being analyzed. + +.PARAMETER Repo + Repository in `owner/name` form. Defaults to dotnet/maui. + +.PARAMETER FilePaths + Optional list of files to analyze. If omitted, auto-detected from `gh pr diff`. + +.PARAMETER MonthsBack + How many months of history to scan for fix PRs. Default 6. + +.PARAMETER MaxRecentPRsPerFile + Cap on how many recent PRs to inspect per file (rate-limit guard). Default 20. + +.PARAMETER OutputDir + Directory to write content.md, risks.json, result.txt. If omitted, only console output. + +.PARAMETER WriteInlineFindings + When set, append entries to inline-findings.json at the file:line where reverted code + was deleted. Off by default until accuracy is validated. + +.EXAMPLE + pwsh .github/scripts/Find-RegressionRisks.ps1 -PRNumber 33908 + +.EXAMPLE + pwsh .github/scripts/Find-RegressionRisks.ps1 -PRNumber 33908 ` + -OutputDir "CustomAgentLogsTmp/PRState/33908/PRAgent/regression-check" +#> + +[CmdletBinding()] +param( + [Parameter(Mandatory = $true)] + [int]$PRNumber, + + [Parameter(Mandatory = $false)] + [string]$Repo = "dotnet/maui", + + [Parameter(Mandatory = $false)] + [string[]]$FilePaths, + + [Parameter(Mandatory = $false)] + [int]$MonthsBack = 6, + + [Parameter(Mandatory = $false)] + [int]$MaxRecentPRsPerFile = 20, + + [Parameter(Mandatory = $false)] + [string]$BaseBranch = 'main', + + [Parameter(Mandatory = $false)] + [string]$OutputDir, + + [Parameter(Mandatory = $false)] + [switch]$WriteInlineFindings +) + +$ErrorActionPreference = 'Continue' + +# ─── Helpers ────────────────────────────────────────────────────────────────── + +function Write-Banner { + param([string]$Title) + Write-Host "" + Write-Host "═══════════════════════════════════════════════════════════" -ForegroundColor Cyan + Write-Host " $Title" -ForegroundColor Cyan + Write-Host "═══════════════════════════════════════════════════════════" -ForegroundColor Cyan +} + +function ConvertTo-NormalizedLine { + # Whitespace-insensitive comparison key. Collapses runs of whitespace to a single space + # so an indent change alone won't trigger a false REVERT. + param([string]$Line) + return ($Line -replace '\s+', ' ').Trim() +} + +function Test-IsImplementationFile { + param([string]$Path) + if ($Path -notmatch '\.(cs|xaml)$') { return $false } + if ($Path -match '(?i)(Tests|TestCases|tests|snapshots|samples)/') { return $false } + if ($Path -match '\.Designer\.cs$') { return $false } + if ($Path -match '\.g\.cs$') { return $false } + return $true +} + +function Test-IsTestFile { + param([string]$Path) + if ($Path -notmatch '\.cs$') { return $false } + if ($Path -match '(?i)(Tests|TestCases)/') { return $true } + return $false +} + +function Get-PRDiffText { + param( + [int]$Number, + [string]$Repo + ) + $raw = gh pr diff $Number --repo $Repo 2>$null + if (-not $raw) { return $null } + if ($raw -is [array]) { $raw = $raw -join "`n" } + return $raw +} + +function Get-DiffLinesByFile { + <# + Parses a unified diff. Returns a hashtable: + { filePath -> [PSCustomObject]@{ Sign = '+' | '-'; Text = '...'; Line = } } + Line numbers are tracked from hunk headers so we can post inline findings. + #> + param( + [string]$DiffText + ) + $byFile = @{} + $currentFile = $null + $newLineCursor = 0 + $oldLineCursor = 0 + + foreach ($rawLine in ($DiffText -split "`n")) { + # Strip trailing CR (Windows-style line endings can survive in diff output) + $line = $rawLine.TrimEnd("`r") + + if ($line -match '^diff --git a/(.*) b/(.*)$') { + $currentFile = $Matches[2] + if (-not $byFile.ContainsKey($currentFile)) { + $byFile[$currentFile] = [System.Collections.Generic.List[object]]::new() + } + continue + } + if (-not $currentFile) { continue } + + if ($line -match '^@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@') { + $oldLineCursor = [int]$Matches[1] + $newLineCursor = [int]$Matches[2] + continue + } + + # Skip diff metadata lines + if ($line -match '^(---|\+\+\+|index |new file|deleted file|similarity|rename|Binary)') { continue } + + # "\ No newline at end of file" marker — explicitly skip without advancing cursors + if ($line -match '^\\ No newline at end of file') { continue } + + if ($line.Length -eq 0) { + # Empty diff line outside a hunk — ignore (cursors only matter inside hunks) + continue + } + + $sign = $line.Substring(0, 1) + $text = if ($line.Length -gt 1) { $line.Substring(1) } else { '' } + + switch ($sign) { + '+' { + $byFile[$currentFile].Add([PSCustomObject]@{ + Sign = '+'; Text = $text; Line = $newLineCursor + }) + $newLineCursor++ + } + '-' { + $byFile[$currentFile].Add([PSCustomObject]@{ + Sign = '-'; Text = $text; Line = $oldLineCursor + }) + $oldLineCursor++ + } + ' ' { + $oldLineCursor++ + $newLineCursor++ + } + default { + # Unknown line — don't advance cursors + } + } + } + return $byFile +} + +function Test-IsTrivialLine { + # Filters out lines that produce meaningless matches (control-flow keywords alone, + # punctuation, single-token braces). A line must contain a substantive identifier + # or expression to be a useful match key. + param([string]$NormalizedText) + + if ([string]::IsNullOrWhiteSpace($NormalizedText)) { return $true } + if ($NormalizedText.Length -le 4) { return $true } + + # Punctuation/brace-only lines + if ($NormalizedText -match '^[\s\{\}\(\)\[\];,:]+$') { return $true } + + # Pure control-flow / scope keywords with optional terminator + if ($NormalizedText -match '^(return|break|continue|throw|else|try|finally|do|true|false|null);?\s*$') { return $true } + + # `using xyz;` and `namespace xyz` are very common — not interesting unless they + # appear next to surrounding context which we don't compare here. Skip. + if ($NormalizedText -match '^(using|namespace)\s+[\w\.]+;?\s*$') { return $true } + + # Comment-only lines + if ($NormalizedText -match '^(//|/\*|\*|#)') { return $true } + + return $false +} + +function Test-IsBugFixLabel { + param([string]$Label) + # Only definitive bug-fix labels. p/0 and p/1 are priority labels that also + # apply to enhancements — they're used as secondary signal in Get-PRMetadataIfBugFix + # (AND-ed with linked-issue bug labels) but not as standalone classifiers. + return $Label -match '^(i/regression|t/bug)$' +} + +function Get-LinkedIssueNumbers { + param([string]$PRBody) + if (-not $PRBody) { return @() } + if ($PRBody -is [array]) { $PRBody = $PRBody -join "`n" } + $normalized = $PRBody -replace "`r`n", "`n" + $set = New-Object 'System.Collections.Generic.HashSet[int]' + + $patterns = @( + '(?i)(?:Fix(?:es|ed)?|Close[sd]?|Resolve[sd]?)\s+(?:https://github\.com/dotnet/maui/issues/)?#?(\d+)', + '(?m)^\s*-\s+#(\d+)\s*$', + '(?m)^\s*-\s+https://github\.com/dotnet/maui/issues/(\d+)\s*$' + ) + foreach ($pat in $patterns) { + foreach ($m in [regex]::Matches($normalized, $pat)) { + [void]$set.Add([int]$m.Groups[1].Value) + } + } + return @($set) +} + +function Get-PRMetadataIfBugFix { + param([int]$Number, [string]$Repo) + + # Single gh call for labels + title + body + merge commit (was 3 separate calls before). + $json = gh pr view $Number --repo $Repo --json labels,title,body,mergeCommit 2>$null + if (-not $json) { return $null } + if ($json -is [array]) { $json = $json -join "`n" } + + try { + $data = $json | ConvertFrom-Json + } catch { + return $null + } + + $labelNames = @() + if ($data.labels) { + $labelNames = @($data.labels | ForEach-Object { $_.name } | Where-Object { $_ }) + } + + $matched = @($labelNames | Where-Object { Test-IsBugFixLabel $_ }) + $title = if ($data.title) { $data.title } else { '(unknown)' } + $linkedIssues = Get-LinkedIssueNumbers $data.body + + # Secondary signal: high-priority labels (p/0, p/1) combined with + # linked-issue bug labels suggest a bug-fix even when the PR itself + # lacks t/bug or i/regression. + $hasPriorityLabel = @($labelNames | Where-Object { $_ -match '^(p/0|p/1)$' }).Count -gt 0 + + # Fall back to linked-issue labels (the PR itself may not be labeled even though + # it fixes a bug — common for fork PRs where labels weren't applied at merge). + if ($matched.Count -eq 0 -and $linkedIssues.Count -gt 0) { + foreach ($issueNum in $linkedIssues) { + $issueLabelsRaw = gh issue view $issueNum --repo $Repo --json labels --jq '.labels[].name' 2>$null + if (-not $issueLabelsRaw) { continue } + foreach ($il in ($issueLabelsRaw -split "`n")) { + if (Test-IsBugFixLabel $il) { + $matched += "$il (from #$issueNum)" + } + } + } + } + + # p/0 and p/1 only count as bug-fix signals when combined with a + # definitive bug label from the PR or its linked issues. + if ($matched.Count -gt 0 -and $hasPriorityLabel) { + $matched += @($labelNames | Where-Object { $_ -match '^(p/0|p/1)$' }) + } + + if ($matched.Count -eq 0) { return $null } + + $mergeOid = $null + if ($data.mergeCommit -and $data.mergeCommit.oid) { + $mergeOid = $data.mergeCommit.oid + } + + return [PSCustomObject]@{ + Number = $Number + Title = $title + Labels = $matched + LinkedIssues = $linkedIssues + MergeCommit = $mergeOid + } +} + +# ─── Main ───────────────────────────────────────────────────────────────────── + +# Validate gh authentication before making any API calls. +# Silent auth failures would cause every PR lookup to return empty, +# producing a false CLEAN result for risky PRs. +$authCheck = gh auth status 2>&1 +if ($LASTEXITCODE -ne 0) { + Write-Host "❌ GitHub CLI not authenticated. Cannot reliably analyze regression risks." -ForegroundColor Red + Write-Host " Run 'gh auth login' or set GH_TOKEN. Auth output:" -ForegroundColor Red + Write-Host " $authCheck" -ForegroundColor Gray + exit 2 +} + +Write-Banner "Regression Cross-Reference — PR #$PRNumber" + +# Resolve files +if (-not $FilePaths -or $FilePaths.Count -eq 0) { + Write-Host "📂 Auto-detecting implementation files from PR #$PRNumber…" -ForegroundColor Yellow + $prFiles = gh pr diff $PRNumber --repo $Repo --name-only 2>$null + if (-not $prFiles) { + Write-Host "❌ Could not get PR diff. Make sure gh is authenticated." -ForegroundColor Red + exit 2 + } + $FilePaths = @($prFiles | Where-Object { Test-IsImplementationFile $_ }) + Write-Host " Found $($FilePaths.Count) implementation file(s)" -ForegroundColor Gray +} + +if ($FilePaths.Count -eq 0) { + Write-Host "🟢 No implementation files to check." -ForegroundColor Green + if ($OutputDir) { + New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null + "🟢 No implementation files modified — skipping regression cross-reference." | + Set-Content (Join-Path $OutputDir "content.md") -Encoding UTF8 + '{ "pr_number": ' + $PRNumber + ', "result": "CLEAN", "risks": [] }' | + Set-Content (Join-Path $OutputDir "risks.json") -Encoding UTF8 + "CLEAN" | Set-Content (Join-Path $OutputDir "result.txt") -Encoding UTF8 + } + exit 0 +} + +# Step 1: PR diff (lines removed) +Write-Host "" +Write-Host "📝 Reading current PR diff…" -ForegroundColor Yellow +$prDiff = Get-PRDiffText -Number $PRNumber -Repo $Repo +if (-not $prDiff) { + Write-Host "❌ Empty PR diff." -ForegroundColor Red + exit 2 +} +$prDiffByFile = Get-DiffLinesByFile -DiffText $prDiff + +# Per-file: removed lines (non-trivial) AND added lines (for move-suppression). +$removedByFile = @{} +$addedNormByFile = @{} +foreach ($file in $prDiffByFile.Keys) { + $removed = @($prDiffByFile[$file] | Where-Object { + $_.Sign -eq '-' -and -not (Test-IsTrivialLine (ConvertTo-NormalizedLine $_.Text)) + }) + if ($removed.Count -gt 0) { + $removedByFile[$file] = $removed + } + + $added = $prDiffByFile[$file] | Where-Object { $_.Sign -eq '+' } | + ForEach-Object { ConvertTo-NormalizedLine $_.Text } + $addedSet = New-Object 'System.Collections.Generic.HashSet[string]' + foreach ($a in $added) { [void]$addedSet.Add($a) } + $addedNormByFile[$file] = $addedSet +} + +# Resolve the base ref for git log scope. Try local refs first; if neither exists, fall +# back to --all (with a warning) so the script still produces useful output. +$gitLogRef = $null +foreach ($candidate in @($BaseBranch, "origin/$BaseBranch", "upstream/$BaseBranch")) { + git rev-parse --verify --quiet $candidate 2>$null | Out-Null + if ($LASTEXITCODE -eq 0) { + $gitLogRef = $candidate + break + } +} +if (-not $gitLogRef) { + Write-Host " ⚠️ Base ref '$BaseBranch' not found locally — falling back to --all (may include unrelated history)." -ForegroundColor Yellow +} + +# Resolve the PR's base branch so we can verify that fix PRs were actually merged +# into it. A fix merged to inflight/current won't be reachable from main. +$prBaseRef = $null +$prBaseJson = gh pr view $PRNumber --repo $Repo --json baseRefName --jq '.baseRefName' 2>$null +if ($prBaseJson) { + foreach ($candidate in @($prBaseJson, "origin/$prBaseJson", "upstream/$prBaseJson")) { + git rev-parse --verify --quiet $candidate 2>$null | Out-Null + if ($LASTEXITCODE -eq 0) { + $prBaseRef = $candidate + break + } + } +} +if ($prBaseRef) { + Write-Host " 📌 PR targets '$prBaseJson' — verifying fix PRs are reachable from $prBaseRef" -ForegroundColor Gray +} else { + Write-Host " ⚠️ Could not resolve PR base branch — skipping ancestry verification" -ForegroundColor Yellow +} + +# Steps 2-5: per file +$risks = New-Object System.Collections.Generic.List[object] +$inspectedPRs = @{} +$fixDiffCache = @{} +$ghCallCount = 0 + +foreach ($filePath in $FilePaths) { + Write-Host "" + Write-Host "🔍 $filePath" -ForegroundColor Cyan + + # Step 2: recent PRs touching this file + $sinceDate = (Get-Date).AddMonths(-$MonthsBack).ToString("yyyy-MM-dd") + if ($gitLogRef) { + # `--follow` traces through renames so we don't lose history when a file moves. + # `--follow` is single-file only, which matches our per-file loop. + $commitLog = git log --oneline --follow --since="$sinceDate" $gitLogRef -- $filePath 2>$null + } else { + $commitLog = git log --oneline --follow --since="$sinceDate" --all -- $filePath 2>$null + } + if (-not $commitLog) { + Write-Host " 🟢 No recent commits." -ForegroundColor Green + continue + } + + $recentPRs = New-Object 'System.Collections.Generic.List[int]' + $seen = New-Object 'System.Collections.Generic.HashSet[int]' + foreach ($line in ($commitLog -split "`n")) { + if ($line -match '\(#(\d+)\)') { + $n = [int]$Matches[1] + if ($n -ne $PRNumber -and $seen.Add($n)) { + $recentPRs.Add($n) + if ($recentPRs.Count -ge $MaxRecentPRsPerFile) { break } + } + } + } + + if ($recentPRs.Count -eq 0) { + Write-Host " 🟢 No recent PRs reference this file." -ForegroundColor Green + continue + } + + Write-Host " Found $($recentPRs.Count) recent PR(s)" -ForegroundColor Gray + + # Step 3: filter to bug-fix PRs + foreach ($recentPR in $recentPRs) { + Write-Host " 📋 #$recentPR…" -ForegroundColor Gray -NoNewline + + if ($inspectedPRs.ContainsKey($recentPR)) { + $meta = $inspectedPRs[$recentPR] + } else { + $meta = Get-PRMetadataIfBugFix -Number $recentPR -Repo $Repo + $inspectedPRs[$recentPR] = $meta + # Single combined `gh pr view --json labels,title,body` + up to one `gh issue + # view` per linked issue. Average ≈ 1-3 calls per fix-PR candidate. + $ghCallCount += 1 + ($(if ($meta -and $meta.LinkedIssues) { @($meta.LinkedIssues).Count } else { 0 })) + if ($ghCallCount -gt 100) { + Write-Host " (rate-limit guard: $ghCallCount gh calls so far)" -ForegroundColor DarkYellow + } + } + if (-not $meta) { + Write-Host " not a bug-fix" -ForegroundColor DarkGray + continue + } + Write-Host " bug-fix [$($meta.Labels -join ', ')]" -ForegroundColor Yellow + + # Verify fix PR was actually merged into the PR's base branch. A fix merged + # to inflight/current (or another branch) won't be in a PR targeting main. + if ($prBaseRef -and $meta.MergeCommit) { + git merge-base --is-ancestor $meta.MergeCommit $prBaseRef 2>$null + if ($LASTEXITCODE -ne 0) { + Write-Host " ⏭️ fix not in PR's base branch (merged to different branch)" -ForegroundColor DarkGray + continue + } + } + + # Step 4: parsed fix-PR diff (cache the *parsed* output, not just raw text). + if ($fixDiffCache.ContainsKey($recentPR)) { + $fixByFile = $fixDiffCache[$recentPR] + } else { + $fixDiff = Get-PRDiffText -Number $recentPR -Repo $Repo + $ghCallCount++ + $fixByFile = if ($fixDiff) { Get-DiffLinesByFile -DiffText $fixDiff } else { @{} } + $fixDiffCache[$recentPR] = $fixByFile + } + if ($fixByFile.Count -eq 0) { + # Fix PR diff unavailable — record only if we actually deleted something here. + if ($removedByFile.ContainsKey($filePath)) { + $risks.Add([PSCustomObject]@{ + File = $filePath + RecentPR = $recentPR + PRTitle = $meta.Title + FixedIssues = ($meta.LinkedIssues | ForEach-Object { "#$_" }) -join ', ' + Labels = $meta.Labels -join ', ' + Risk = 'OVERLAP' + Details = 'Fix PR diff unavailable' + RevertedLines = @() + }) + } + continue + } + + if (-not $fixByFile.ContainsKey($filePath)) { + continue + } + + $addedByFix = @($fixByFile[$filePath] | + Where-Object { $_.Sign -eq '+' -and -not (Test-IsTrivialLine (ConvertTo-NormalizedLine $_.Text)) } | + ForEach-Object { ConvertTo-NormalizedLine $_.Text }) | Select-Object -Unique + if ($addedByFix.Count -eq 0) { continue } + + $removedHere = $removedByFile[$filePath] + # OVERLAP only matters when the current PR actually deleted something from this + # file. Otherwise, "same file, different lines" isn't regression evidence. + if (-not $removedHere) { + continue + } + + # Step 5: compare. Suppress matches the current PR also re-added (move/refactor). + $addedSet = New-Object 'System.Collections.Generic.HashSet[string]' + foreach ($n in $addedByFix) { [void]$addedSet.Add($n) } + $currentAddedSet = $addedNormByFile[$filePath] + + $reverted = New-Object System.Collections.Generic.List[object] + $seenLines = New-Object 'System.Collections.Generic.HashSet[string]' + foreach ($r in $removedHere) { + $key = ConvertTo-NormalizedLine $r.Text + if (-not $addedSet.Contains($key)) { continue } + if ($currentAddedSet -and $currentAddedSet.Contains($key)) { continue } # moved within PR + if (-not $seenLines.Add($key)) { continue } # dedup repeats + $reverted.Add([PSCustomObject]@{ Text = $r.Text; Line = $r.Line }) + } + + # Pre-compute values outside [PSCustomObject]@{} to avoid PowerShell evaluation + # context issues (observed "Argument types do not match" when $reverted.Count is + # evaluated inside a hashtable literal passed to List[object].Add()). + $issueLinks = ($meta.LinkedIssues | ForEach-Object { "#$_" }) -join ', ' + $labelJoined = $meta.Labels -join ', ' + $revertCount = $reverted.Count + $revertedArr = $reverted.ToArray() + + if ($revertCount -gt 0) { + Write-Host " 🔴 REVERT — $revertCount line(s) from #$recentPR being removed" -ForegroundColor Red + foreach ($rl in $reverted) { Write-Host " - $($rl.Text.Trim())" -ForegroundColor Red } + $riskEntry = [PSCustomObject]@{ + File = $filePath + RecentPR = $recentPR + PRTitle = $meta.Title + FixedIssues = $issueLinks + Labels = $labelJoined + Risk = 'REVERT' + Details = "Removes $revertCount line(s) added by fix PR #$recentPR" + RevertedLines = $revertedArr + } + $risks.Add($riskEntry) + } else { + $riskEntry = [PSCustomObject]@{ + File = $filePath + RecentPR = $recentPR + PRTitle = $meta.Title + FixedIssues = $issueLinks + Labels = $labelJoined + Risk = 'OVERLAP' + Details = 'Same file, different lines' + RevertedLines = @() + } + $risks.Add($riskEntry) + } + } +} + +# ─── Extract test files from fix PRs that triggered REVERT ───────────────────── +# For each REVERT, find test files the fix PR added/modified and classify them +# via Detect-TestsInDiff.ps1 (if available). This enables downstream test execution. + +$detectTestsScript = Join-Path $PSScriptRoot "shared/Detect-TestsInDiff.ps1" +$hasTestDetector = Test-Path $detectTestsScript + +$fixPRsWithTests = @{} # fixPR -> array of test metadata + +if ($hasTestDetector) { + # Extract tests for ALL risk entries (REVERT and OVERLAP) for maximum confidence + $allFixPRs = @($risks | Select-Object -ExpandProperty RecentPR -Unique) + + foreach ($fixPR in $allFixPRs) { + if ($fixPRsWithTests.ContainsKey($fixPR)) { continue } + + # Get all file paths from the fix PR diff (already cached) + $fixFiles = @() + if ($fixDiffCache.ContainsKey($fixPR)) { + $fixFiles = @($fixDiffCache[$fixPR].Keys | Where-Object { Test-IsTestFile $_ }) + } + + if ($fixFiles.Count -eq 0) { + Write-Host " [info] Fix PR #$fixPR`: no test files in diff" -ForegroundColor DarkGray + $fixPRsWithTests[$fixPR] = @() + continue + } + + Write-Host " 🧪 Fix PR #$fixPR`: detecting tests from $($fixFiles.Count) test file(s)…" -ForegroundColor Cyan + try { + $detected = & $detectTestsScript -ChangedFiles $fixFiles 2>&1 + # Filter out Write-Host output — only keep returned objects + $testEntries = @($detected | Where-Object { $_ -is [hashtable] -or ($_ -is [PSCustomObject]) }) + if ($testEntries.Count -gt 0) { + Write-Host " Found $($testEntries.Count) test(s)" -ForegroundColor Green + $fixPRsWithTests[$fixPR] = $testEntries + } else { + Write-Host " No classifiable tests found" -ForegroundColor DarkGray + $fixPRsWithTests[$fixPR] = @() + } + } catch { + Write-Host " ⚠️ Test detection failed: $_" -ForegroundColor Yellow + $fixPRsWithTests[$fixPR] = @() + } + } +} else { + Write-Host " ℹ️ Detect-TestsInDiff.ps1 not found — skipping test extraction" -ForegroundColor DarkGray +} + +# Attach test metadata to ALL risk entries (REVERT and OVERLAP) +foreach ($r in $risks) { + $r | Add-Member -NotePropertyName TestsFromFixPR -NotePropertyValue @() -Force + if ($fixPRsWithTests.ContainsKey($r.RecentPR)) { + $r.TestsFromFixPR = $fixPRsWithTests[$r.RecentPR] + } +} + +Write-Banner "Results" + +$reverts = @($risks | Where-Object { $_.Risk -eq 'REVERT' }) +$overlaps = @($risks | Where-Object { $_.Risk -eq 'OVERLAP' }) +$result = if ($reverts.Count -gt 0) { 'REVERT' } + elseif ($overlaps.Count -gt 0) { 'OVERLAP' } + else { 'CLEAN' } + +switch ($result) { + 'REVERT' { + Write-Host "🔴 REVERT RISKS: $($reverts.Count)" -ForegroundColor Red + foreach ($r in $reverts) { + Write-Host "" + Write-Host " File: $($r.File)" -ForegroundColor Red + Write-Host " Fix PR: #$($r.RecentPR) — $($r.PRTitle)" -ForegroundColor Red + Write-Host " Fixed: $($r.FixedIssues)" -ForegroundColor Red + Write-Host " Reverted: $((@($r.RevertedLines) | Select-Object -First 3 | ForEach-Object { $_.Text.Trim() }) -join ' | ')" -ForegroundColor Red + } + $allIssues = @($reverts | ForEach-Object { $_.FixedIssues -split ',\s*' } | + Where-Object { $_ } | Select-Object -Unique | Sort-Object) + if ($allIssues.Count -gt 0) { + Write-Host "" + Write-Host "⚠️ Verify that issues $($allIssues -join ', ') do not re-regress." -ForegroundColor Yellow + } + } + 'OVERLAP' { + Write-Host "🟡 OVERLAPS: $($overlaps.Count) (lower risk — same files, different lines)" -ForegroundColor Yellow + foreach ($o in $overlaps) { + Write-Host " $($o.File) — fix PR #$($o.RecentPR) ($($o.FixedIssues))" -ForegroundColor Yellow + } + } + 'CLEAN' { + Write-Host "🟢 No regression risks detected." -ForegroundColor Green + } +} + +Write-Host "" +Write-Host "(gh API calls: $ghCallCount; PRs inspected: $($inspectedPRs.Count))" -ForegroundColor DarkGray + +# ─── Output files ───────────────────────────────────────────────────────────── + +if ($OutputDir) { + New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null + + # result.txt + $result | Set-Content (Join-Path $OutputDir 'result.txt') -Encoding UTF8 + + # risks.json — structured output for agent consumption + $jsonRisks = @($risks | ForEach-Object { + $entry = @{ + file = $_.File + recent_pr = $_.RecentPR + pr_title = $_.PRTitle + fixed_issues = $_.FixedIssues + labels = $_.Labels + risk = $_.Risk + details = $_.Details + reverted_lines = @(@($_.RevertedLines) | ForEach-Object { @{ text = $_.Text; line = $_.Line } }) + } + # Include test metadata for all risk entries (REVERT and OVERLAP) + if ($_.TestsFromFixPR -and $_.TestsFromFixPR.Count -gt 0) { + $entry['regression_tests'] = @($_.TestsFromFixPR | ForEach-Object { + @{ + type = $_.Type + test_name = $_.TestName + filter = $_.Filter + project_path = $_.ProjectPath + project = $_.Project + runner = $_.Runner + files = @($_.Files) + } + }) + } else { + $entry['regression_tests'] = @() + } + $entry + }) + $payload = @{ + pr_number = $PRNumber + result = $result + revert_count = $reverts.Count + overlap_count= $overlaps.Count + risks = $jsonRisks + } | ConvertTo-Json -Depth 6 + $payload | Set-Content (Join-Path $OutputDir 'risks.json') -Encoding UTF8 + + # content.md — markdown summary for the wall-of-text PR comment + $md = New-Object System.Text.StringBuilder + [void]$md.AppendLine("## 🔍 Regression Cross-Reference") + [void]$md.AppendLine() + switch ($result) { + 'REVERT' { + [void]$md.AppendLine("🔴 **Revert risks detected** — this PR removes $($reverts.Count) line(s) previously added by labeled bug-fix PRs.") + [void]$md.AppendLine() + [void]$md.AppendLine("| File | Fix PR | Fixed issue(s) | Risk | Reverted line |") + [void]$md.AppendLine("|---|---|---|---|---|") + foreach ($r in $reverts) { + $sample = @($r.RevertedLines) | Select-Object -First 1 | ForEach-Object { $_.Text.Trim() } + $sampleEsc = ($sample -replace '\|', '\|') + [void]$md.AppendLine("| ``$($r.File)`` | #$($r.RecentPR) | $($r.FixedIssues) | 🔴 REVERT | ``$sampleEsc`` |") + } + $allIssues = @($reverts | ForEach-Object { $_.FixedIssues -split ',\s*' } | + Where-Object { $_ } | Select-Object -Unique | Sort-Object) + if ($allIssues.Count -gt 0) { + [void]$md.AppendLine() + [void]$md.AppendLine("**Action required:** Verify that issues $($allIssues -join ', ') do not re-regress before merging.") + } + + # List regression tests that should be run + $allRegressionTests = @($reverts | Where-Object { $_.TestsFromFixPR.Count -gt 0 } | + ForEach-Object { $pr = $_.RecentPR; $_.TestsFromFixPR | ForEach-Object { + [PSCustomObject]@{ FixPR = $pr; Type = $_.Type; TestName = $_.TestName; Filter = $_.Filter; Runner = $_.Runner } + }}) + if ($allRegressionTests.Count -gt 0) { + [void]$md.AppendLine() + [void]$md.AppendLine("### 🧪 Regression Tests to Verify") + [void]$md.AppendLine() + [void]$md.AppendLine("These tests were added by the fix PRs being reverted. They must still pass:") + [void]$md.AppendLine() + [void]$md.AppendLine("| Fix PR | Type | Test | Filter |") + [void]$md.AppendLine("|---|---|---|---|") + foreach ($t in $allRegressionTests) { + [void]$md.AppendLine("| #$($t.FixPR) | $($t.Type) | $($t.TestName) | ``$($t.Filter)`` |") + } + } + } + 'OVERLAP' { + [void]$md.AppendLine("🟡 **Overlaps with prior bug-fix PRs** — same files modified, but no exact line revert detected.") + [void]$md.AppendLine() + [void]$md.AppendLine("| File | Fix PR | Fixed issue(s) |") + [void]$md.AppendLine("|---|---|---|") + foreach ($o in $overlaps) { + [void]$md.AppendLine("| ``$($o.File)`` | #$($o.RecentPR) | $($o.FixedIssues) |") + } + + # List regression tests from overlapping fix PRs + $overlapTests = @($overlaps | Where-Object { $_.TestsFromFixPR.Count -gt 0 } | + ForEach-Object { $pr = $_.RecentPR; $_.TestsFromFixPR | ForEach-Object { + [PSCustomObject]@{ FixPR = $pr; Type = $_.Type; TestName = $_.TestName; Filter = $_.Filter; Runner = $_.Runner } + }}) + if ($overlapTests.Count -gt 0) { + [void]$md.AppendLine() + [void]$md.AppendLine("### 🧪 Regression Tests to Verify") + [void]$md.AppendLine() + [void]$md.AppendLine("These tests were added by the overlapping fix PRs. Running them to verify no side-effect regressions:") + [void]$md.AppendLine() + [void]$md.AppendLine("| Fix PR | Type | Test | Filter |") + [void]$md.AppendLine("|---|---|---|---|") + foreach ($t in $overlapTests) { + [void]$md.AppendLine("| #$($t.FixPR) | $($t.Type) | $($t.TestName) | ``$($t.Filter)`` |") + } + } + } + 'CLEAN' { + [void]$md.AppendLine("🟢 No regression risks detected. No labeled bug-fix PRs in the last $MonthsBack months touched the modified files.") + } + } + $md.ToString() | Set-Content (Join-Path $OutputDir 'content.md') -Encoding UTF8 + + # inline-findings.json — optional, only if reverts found + if ($WriteInlineFindings -and $reverts.Count -gt 0) { + $inlinePath = Join-Path $OutputDir 'inline-findings.json' + $inline = @() + foreach ($r in $reverts) { + foreach ($rl in @($r.RevertedLines)) { + $prUrl = "https://github.com/$Repo/pull/$($r.RecentPR)" + $body = "🔴 **Regression risk** — this line was added by [#$($r.RecentPR)]($prUrl) to fix $($r.FixedIssues). Removing it may re-introduce the original bug. Please confirm this removal is intentional and that the previously-fixed issue is covered by another mechanism." + $inline += @{ + path = $r.File + line = $rl.Line + body = $body + side = 'LEFT' + } + } + } + ($inline | ConvertTo-Json -Depth 4) | Set-Content $inlinePath -Encoding UTF8 + Write-Host "" + Write-Host "📝 Wrote $($inline.Count) inline finding(s) to $inlinePath" -ForegroundColor DarkGray + } + + Write-Host "" + Write-Host "📁 Outputs written to: $OutputDir" -ForegroundColor DarkGray +} + +exit 0 diff --git a/.github/scripts/Review-PR.Tests.ps1 b/.github/scripts/Review-PR.Tests.ps1 new file mode 100644 index 000000000000..f3674a0af24a --- /dev/null +++ b/.github/scripts/Review-PR.Tests.ps1 @@ -0,0 +1,237 @@ +#!/usr/bin/env pwsh +#Requires -Modules Pester +<# +.SYNOPSIS + Pester tests for pure-function helpers in Review-PR.ps1. + Currently covers: + - Get-TrxResults (parses VSTest TRX produced by `dotnet test --logger trx`) + - Get-DotNetTestResults (legacy console-output scraper, still used as fallback + when TRX is missing) + + These functions sit on the critical path of STEP 3 (UI Test Execution + Results in the AI summary comment). A regression here can silently + misrender per-test counts (e.g. "1/1 (1 ❌)" instead of "75/619 (544 ❌)") + so they're worth pinning with focused tests. + +.EXAMPLE + Invoke-Pester ./Review-PR.Tests.ps1 + Invoke-Pester ./Review-PR.Tests.ps1 -Output Detailed +#> + +BeforeAll { + # Source just the helper functions we want to test out of Review-PR.ps1. + # We can't dot-source the entire script because it has top-level imperative + # logic (banner, prerequisites, step driver) that runs at parse time. + $reviewScript = Join-Path $PSScriptRoot 'Review-PR.ps1' + $content = Get-Content -Raw $reviewScript + + function Get-FunctionBody { + param([string]$ScriptText, [string]$FunctionName) + $start = $ScriptText.IndexOf("function $FunctionName") + if ($start -lt 0) { throw "Function '$FunctionName' not found" } + $i = $ScriptText.IndexOf('{', $start) + $depth = 0; $end = -1 + for (; $i -lt $ScriptText.Length; $i++) { + $c = $ScriptText[$i] + if ($c -eq '{') { $depth++ } + elseif ($c -eq '}') { $depth--; if ($depth -eq 0) { $end = $i; break } } + } + return $ScriptText.Substring($start, $end - $start + 1) + } + + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-TrxResults') + Invoke-Expression (Get-FunctionBody -ScriptText $content -FunctionName 'Get-DotNetTestResults') +} + +Describe 'Get-TrxResults' { + BeforeAll { + $script:fixtureDir = Join-Path ([System.IO.Path]::GetTempPath()) "trx-fixtures-$(New-Guid)" + New-Item -ItemType Directory -Path $script:fixtureDir -Force | Out-Null + } + + AfterAll { + Remove-Item -Path $script:fixtureDir -Recurse -Force -ErrorAction SilentlyContinue + } + + It 'returns null for a missing file' { + $r = Get-TrxResults -TrxPath '/does/not/exist.trx' + $r | Should -BeNullOrEmpty + } + + It 'returns null for an empty path' { + Get-TrxResults -TrxPath '' | Should -BeNullOrEmpty + Get-TrxResults -TrxPath $null | Should -BeNullOrEmpty + } + + It 'parses aggregate counters from ResultSummary/Counters' { + $trx = Join-Path $script:fixtureDir 'aggregate.trx' + @' + + + + + + + +'@ | Set-Content -Path $trx -Encoding UTF8 + + $r = Get-TrxResults -TrxPath $trx + $r.Total | Should -Be 619 + $r.Passed | Should -Be 75 + $r.Failed | Should -Be 544 + $r.Skipped | Should -Be 0 + } + + It 'computes Skipped as Total-Executed when not separately tracked' { + $trx = Join-Path $script:fixtureDir 'skipped.trx' + @' + + + + + + + +'@ | Set-Content -Path $trx -Encoding UTF8 + + $r = Get-TrxResults -TrxPath $trx + $r.Total | Should -Be 100 + $r.Skipped | Should -Be 7 # 100 - 93 + } + + It 'parses individual UnitTestResult nodes into the Results list' { + $trx = Join-Path $script:fixtureDir 'individual.trx' + @' + + + + + + + + + + + Expected: True; Actual: False + at Bar() in F.cs:line 42 + + + + + + +'@ | Set-Content -Path $trx -Encoding UTF8 + + $r = Get-TrxResults -TrxPath $trx + $r.Results.Count | Should -Be 3 + + $foo = $r.Results | Where-Object { $_.name -eq 'Foo' } + $foo.status | Should -Be 'Passed' + + $bar = $r.Results | Where-Object { $_.name -eq 'Bar' } + $bar.status | Should -Be 'Failed' + $bar.error | Should -Be 'Expected: True; Actual: False' + $bar.stack | Should -Be 'at Bar() in F.cs:line 42' + + $baz = $r.Results | Where-Object { $_.name -eq 'Baz' } + $baz.status | Should -Be 'Skipped' # NotExecuted normalized to Skipped + } + + It 'normalizes Inconclusive outcome to Skipped' { + $trx = Join-Path $script:fixtureDir 'inconclusive.trx' + @' + + + + + + + + + +'@ | Set-Content -Path $trx -Encoding UTF8 + + (Get-TrxResults -TrxPath $trx).Results[0].status | Should -Be 'Skipped' + } + + It 'returns an empty Results array when there are no UnitTestResult nodes' { + $trx = Join-Path $script:fixtureDir 'empty.trx' + @' + + + + + + + +'@ | Set-Content -Path $trx -Encoding UTF8 + + $r = Get-TrxResults -TrxPath $trx + $r.Results.Count | Should -Be 0 + $r.Total | Should -Be 0 + } + + It 'gracefully handles malformed XML (returns null, does not throw)' { + $trx = Join-Path $script:fixtureDir 'bad.trx' + ' + + + + +'@ | Set-Content -Path $trx -Encoding UTF8 + + (Get-TrxResults -TrxPath $trx).TrxPath | Should -Be $trx + } +} + +Describe 'Get-DotNetTestResults (console-scrape fallback)' { + It 'parses a single Passed entry' { + $lines = @( + ' Passed Foo.Bar [12 ms]' + ) + $r = Get-DotNetTestResults -Lines $lines + $r.Count | Should -Be 1 + $r[0].status | Should -Be 'Passed' + $r[0].name | Should -Be 'Foo.Bar' + } + + It 'parses multiple consecutive results' { + $lines = @( + ' Passed One [1 ms]', + ' Passed Two [2 ms]', + ' Failed Three [3 ms]' + ) + $r = Get-DotNetTestResults -Lines $lines + $r.Count | Should -Be 3 + ($r | Where-Object { $_.status -eq 'Failed' }).name | Should -Be 'Three' + } + + It 'captures error message and stack between two results' { + $lines = @( + ' Passed Alpha [10 ms]', + ' Failed Beta [20 ms]', + ' Error Message:', + ' Expected: 1; Actual: 2', + ' Stack Trace:', + ' at Beta() in B.cs:line 99', + ' Passed Gamma [5 ms]' + ) + $r = Get-DotNetTestResults -Lines $lines + $beta = $r | Where-Object { $_.name -eq 'Beta' } + $beta.error | Should -Match 'Expected: 1; Actual: 2' + $beta.stack | Should -Match 'at Beta\(\) in B\.cs:line 99' + } + + It 'returns an empty array for empty input' { + (Get-DotNetTestResults -Lines @()).Count | Should -Be 0 + } +} diff --git a/.github/scripts/Review-PR.ps1 b/.github/scripts/Review-PR.ps1 index 50b7d622e6b9..3bce923e2ddc 100644 --- a/.github/scripts/Review-PR.ps1 +++ b/.github/scripts/Review-PR.ps1 @@ -5,12 +5,15 @@ .DESCRIPTION Orchestrates a PR review by invoking scripts and Copilot CLI: - Step 0: Branch setup - Create review branch from main, merge PR squashed - Step 1: Gate - Run test verification directly (verify-tests-fail.ps1) - Step 2: Multi-candidate review - Pre-Flight, then PARALLEL (expert-reviewer eval of PR + Try-Fix×4), - then Report compares all candidates and writes winner.json - Step 3: Post AI Summary - Directly runs posting scripts - Step 4: Apply labels - Apply agent labels based on review results + Step 1: Branch setup - Create review branch from main, merge PR squashed + Step 2: Detect UI categories - Run eng/scripts/detect-ui-test-categories.ps1 (info only) + Step 3: Run detected UI tests - Execute BuildAndRunHostApp.ps1 per detected category (informational) + Step 4: Regression cross-ref - Run Find-RegressionRisks.ps1 + run any tests from prior fix PRs + Step 5: Gate - Run test verification directly (verify-tests-fail.ps1) + Step 6: Multi-candidate review - Pre-Flight, then PARALLEL (expert-reviewer eval of PR + Try-Fix×4), + then Report compares all candidates and writes winner.json + Step 7: Post AI Summary - Directly runs posting scripts + Step 8: Apply labels - Apply agent labels based on review results By default, the script checks out main and creates a review branch from it. If squash-merge conflicts, the script posts a comment on the PR and exits. @@ -117,12 +120,12 @@ $autonomousRules = @" "@ # ═════════════════════════════════════════════════════════════════════════════ -# STEP 0: Branch Setup (Create Review Branch & Cherry-Pick PR) +# STEP 1: Branch Setup (Create Review Branch & Cherry-Pick PR) # ═════════════════════════════════════════════════════════════════════════════ Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Yellow -Write-Host "║ STEP 0: BRANCH SETUP ║" -ForegroundColor Yellow +Write-Host "║ STEP 1: BRANCH SETUP ║" -ForegroundColor Yellow Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Yellow $reviewBranch = "pr-review-$PRNumber" @@ -157,19 +160,30 @@ if ($DryRun) { git branch -D $reviewBranch 2>$null } - # Auto-detect CI environment — in CI, always use current branch + # Auto-detect CI environment $isCI = $env:CI -or $env:TF_BUILD -or $env:GITHUB_ACTIONS -or $env:BUILD_BUILDID - if ($isCI -and -not $UseCurrentBranch) { - Write-Host " 🤖 CI environment detected — using current branch instead of main" -ForegroundColor Cyan - $UseCurrentBranch = $true - } # Capture original branch so error paths can restore it (not `git checkout -` which is unreliable) $originalBranch = git branch --show-current 2>$null if (-not $originalBranch) { $originalBranch = git rev-parse HEAD 2>$null } - if (-not $UseCurrentBranch) { - # Default: checkout main first + if ($UseCurrentBranch) { + $currentBranch = git branch --show-current 2>$null + if (-not $currentBranch) { $currentBranch = "(detached HEAD)" } + Write-Host " 📌 Using current branch: $currentBranch" -ForegroundColor Cyan + } elseif ($isCI) { + # In CI the checkout is pinned to the pipeline branch (e.g. + # feature/regression-check via -b parameter). The pipeline ref + # already contains our script fixes — switching to origin/main + # would overwrite them. Stay on the current branch and squash-merge + # the PR onto it. This preserves all pipeline-ref scripts while + # still testing the PR's changes. + $currentBranch = git branch --show-current 2>$null + if (-not $currentBranch) { $currentBranch = git rev-parse --short HEAD 2>$null } + $baseSha = git rev-parse --short HEAD 2>$null + Write-Host " 🤖 CI environment detected — using pipeline branch '$currentBranch' as merge base ($baseSha)" -ForegroundColor Cyan + } else { + # Default (local): checkout main Write-Host " 📌 Checking out main branch..." -ForegroundColor Cyan git checkout main 2>&1 | Out-Null if ($LASTEXITCODE -ne 0) { Write-Error "Failed to checkout main"; exit 1 } @@ -179,10 +193,6 @@ if ($DryRun) { } $baseSha = git rev-parse --short HEAD 2>$null Write-Host " 📌 Review base: main @ $baseSha" -ForegroundColor Cyan - } else { - $currentBranch = git branch --show-current 2>$null - if (-not $currentBranch) { $currentBranch = "(detached HEAD)" } - Write-Host " 📌 Using current branch: $currentBranch" -ForegroundColor Cyan } # Create review branch @@ -265,6 +275,164 @@ if ($DryRun) { Write-Host " 📝 HEAD: $headCommit" -ForegroundColor Gray } +# ─── Helper: Parse `dotnet test --logger "console;verbosity=detailed"` ────── +# Extracts per-test results (Passed/Failed/Skipped) plus failure messages and +# stack traces from raw stdout. Used by STEP 3 so the AI summary comment shows +# WHICH tests failed and WHY, not just an aggregate exit code. +function Get-DotNetTestResults { + param([string[]]$Lines) + + $results = New-Object System.Collections.ArrayList + if (-not $Lines -or $Lines.Count -eq 0) { return ,@() } + $n = $Lines.Count + $i = 0 + # A test result line: " Passed/Failed/Skipped []" + $testRe = '^ (Passed|Failed|Skipped)\s+(.+?)\s+\[(.+?)\]\s*$' + while ($i -lt $n) { + $line = [string]$Lines[$i] + if ($line -match $testRe) { + $status = $Matches[1] + $name = $Matches[2].Trim() + $duration = $Matches[3].Trim() + + $err = New-Object System.Collections.Generic.List[string] + $stack = New-Object System.Collections.Generic.List[string] + $section = $null + $j = $i + 1 + while ($j -lt $n) { + $l = [string]$Lines[$j] + # Stop at the next test result. + if ($l -match $testRe) { break } + # Stop at runner / xharness section markers. + $stripped = $l.Trim() + if ($stripped.StartsWith('>>>>>') -or + $stripped.StartsWith('NUnit Adapter') -or + $stripped.StartsWith('Test Run') -or + $stripped.StartsWith('Total tests:') -or + $stripped.StartsWith('Total time:') -or + $stripped.StartsWith('Test execution complete') -or + $stripped.StartsWith('Passed!') -or + $stripped.StartsWith('Failed!') -or + $stripped.StartsWith('Skipped!') -or + $stripped -match '^\[xUnit') { + break + } + if ($stripped.StartsWith('Error Message:')) { + $section = 'err' + $rest = $stripped.Substring('Error Message:'.Length).Trim() + if ($rest) { $err.Add($rest) | Out-Null } + } elseif ($stripped.StartsWith('Stack Trace:')) { + $section = 'stack' + $rest = $stripped.Substring('Stack Trace:'.Length).Trim() + if ($rest) { $stack.Add($rest) | Out-Null } + } elseif ($stripped.StartsWith('Standard Output Messages:') -or + $stripped.StartsWith('Attachments:')) { + $section = 'stdout' + } elseif ($section -eq 'err') { + $err.Add($l.TrimEnd()) | Out-Null + } elseif ($section -eq 'stack') { + $stack.Add($l.TrimEnd()) | Out-Null + } + $j++ + } + + $entry = [ordered]@{ + status = $status + name = $name + duration = $duration + error = (($err -join "`n").Trim()) + stack = (($stack -join "`n").Trim()) + } + [void]$results.Add($entry) + $i = [Math]::Max($j, $i + 1) + } else { + $i++ + } + } + # Force array semantics so callers see [object[]] even with 0 or 1 items. + return ,@($results.ToArray()) +} + +# ─── Helper: Parse VSTest TRX file (authoritative test results) ───────────── +# CI's `RunTestWithLocalDotNet` writes a TRX file via: +# --logger "trx;LogFileName=.trx" --results-directory +# The TRX is the same format AzDO's PublishTestResults@2 ingests, so it has +# every test's outcome, duration, error message and stack trace — without +# any console-scrape ambiguity. STEP 3 prefers TRX when available because +# parsing console output is fragile when many tests run, lines wrap, or +# multi-line ErrorRecords get glued together by PowerShell stream merging. +# Get-TrxResults: defined inline because Review-PR.ps1 is invoked by +# Copilot CLI in a way that breaks dot-sourcing ($PSScriptRoot empty). +# The canonical copy lives in shared/Get-TrxResults.ps1 for Stage 3. +function Get-TrxResults { + param([string]$TrxPath) + + if (-not $TrxPath -or -not (Test-Path $TrxPath)) { + return $null + } + + try { + [xml]$trx = Get-Content -Path $TrxPath -Raw -Encoding UTF8 + } catch { + Write-Host " ⚠️ Failed to parse TRX $TrxPath : $_" -ForegroundColor Yellow + return $null + } + + $ns = New-Object System.Xml.XmlNamespaceManager($trx.NameTable) + $ns.AddNamespace('t', 'http://microsoft.com/schemas/VisualStudio/TeamTest/2010') + + $countersNode = $trx.SelectSingleNode('//t:ResultSummary/t:Counters', $ns) + $total = 0; $passed = 0; $failed = 0; $skipped = 0 + if ($countersNode) { + $total = [int]($countersNode.GetAttribute('total')) + $passed = [int]($countersNode.GetAttribute('passed')) + $failed = [int]($countersNode.GetAttribute('failed')) + $executed = [int]($countersNode.GetAttribute('executed')) + $skipped = [Math]::Max(0, $total - $executed) + } + + $entries = New-Object System.Collections.ArrayList + $resultNodes = $trx.SelectNodes('//t:UnitTestResult', $ns) + foreach ($r in $resultNodes) { + $name = $r.GetAttribute('testName') + $outcomeAttr = $r.GetAttribute('outcome') + $status = switch ($outcomeAttr) { + 'Passed' { 'Passed' } + 'Failed' { 'Failed' } + 'NotExecuted' { 'Skipped' } + 'Inconclusive' { 'Skipped' } + # Map all other outcomes (Aborted, Timeout, Error, Disconnected, + # Warning, Pending) to Failed — matches shared/Get-TrxResults.ps1. + default { 'Failed' } + } + $duration = $r.GetAttribute('duration') + $err = ''; $stack = '' + $errInfo = $r.SelectSingleNode('t:Output/t:ErrorInfo', $ns) + if ($errInfo) { + $msgNode = $errInfo.SelectSingleNode('t:Message', $ns) + $stackNode = $errInfo.SelectSingleNode('t:StackTrace', $ns) + if ($msgNode) { $err = $msgNode.InnerText.Trim() } + if ($stackNode) { $stack = $stackNode.InnerText.Trim() } + } + [void]$entries.Add([ordered]@{ + status = $status + name = $name + duration = $duration + error = $err + stack = $stack + }) + } + + return @{ + Total = $total + Passed = $passed + Failed = $failed + Skipped = $skipped + Results = @($entries.ToArray()) + TrxPath = $TrxPath + } +} + # ─── Helper: Invoke Copilot ────────────────────────────────────────────────── function Invoke-CopilotStep { param([string]$StepName, [string]$Prompt) @@ -301,7 +469,7 @@ function Invoke-CopilotStep { # Use JSON output format to stream live progress of agent activity. # Model is overridable via $env:COPILOT_REVIEW_MODEL so contributors without internal-model access # can run this script (e.g., with 'claude-opus-4.6' or 'claude-sonnet-4.6'). - $copilotModel = if ($env:COPILOT_REVIEW_MODEL) { $env:COPILOT_REVIEW_MODEL } else { 'claude-opus-4.7-1m-internal' } + $copilotModel = if ($env:COPILOT_REVIEW_MODEL) { $env:COPILOT_REVIEW_MODEL } else { 'gpt-5.5' } & copilot -p $Prompt --allow-all --output-format json --model $copilotModel 2>&1 | ForEach-Object { $line = $_.ToString() try { @@ -442,12 +610,12 @@ function Invoke-CopilotStep { } # ═════════════════════════════════════════════════════════════════════════════ -# STEP 0.5: DETECT UI Test Categories (detection only — no pipeline trigger) +# STEP 2: DETECT UI Test Categories (detection only — no pipeline trigger) # ═════════════════════════════════════════════════════════════════════════════ Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Cyan -Write-Host "║ STEP 0.5: DETECT UI TEST CATEGORIES ║" -ForegroundColor Cyan +Write-Host "║ STEP 2: DETECT UI TEST CATEGORIES ║" -ForegroundColor Cyan Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Cyan $uitestCategories = "" @@ -477,6 +645,23 @@ if (Test-Path $detectScript) { Write-Host " 🎯 Detected categories: $uitestCategories" -ForegroundColor Green } + # Emit detected categories as an AzDO output variable so downstream + # stages (RunDeepUITests, UpdateAISummaryComment) in ci-copilot.yml + # can read them via $(stageDependencies.ReviewPR.CopilotReview.outputs['RunReview.detectedCategories']). + # `isOutput=true` is required for cross-stage consumption; the + # variable name is namespaced under the step's `name:` property + # in ci-copilot.yml (currently `RunReview`) by AzDO. + # Local invocations (no $env:TF_BUILD) won't have an AzDO variable + # store but the marker is harmless — gets ignored. + # Emit detected categories. Blank = "run all", a specific string = categories, + # NONE = no UI tests needed. Preserve blank as 'ALL' (not NONE) so Stage 2 + # can distinguish "run everything" from "run nothing". + $catsForOutput = if ($uitestCategories -eq 'NONE') { 'NONE' } + elseif ([string]::IsNullOrWhiteSpace($uitestCategories)) { 'ALL' } + else { $uitestCategories } + Write-Host "##vso[task.setvariable variable=detectedCategories;isOutput=true]$catsForOutput" + Write-Host "##vso[task.setvariable variable=detectedPlatform;isOutput=true]$Platform" + # Write detection result for AI summary $uitestOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/uitests" New-Item -ItemType Directory -Force -Path $uitestOutputDir | Out-Null @@ -497,20 +682,665 @@ if (Test-Path $detectScript) { # Belt-and-suspenders: the detect script's manual-PR mode does # `git checkout $headSha`, leaving HEAD detached. Its own try/finally restores # the previous ref, but if that finally is skipped (process killed, scripting -# error before the outer try opens) we'd run Step 1's gate against the wrong -# tree. Force HEAD back to the review branch and fail loudly if we can't. +# error before the outer try opens) we'd run subsequent steps against the +# wrong tree. Force HEAD back to the review branch and fail loudly if we can't. +git checkout $reviewBranch 2>$null | Out-Null +if ($LASTEXITCODE -ne 0) { + Write-Host " ⚠️ Failed to restore review branch '$reviewBranch' after Step 2 — subsequent steps may run against the wrong tree" -ForegroundColor Red +} + +# ═════════════════════════════════════════════════════════════════════════════ +# STEP 3: RUN DETECTED UI TEST CATEGORIES (script, no copilot agent) +# ═════════════════════════════════════════════════════════════════════════════ +# Runs the UI test categories that Step 2 detected. Skipped when: +# - $uitestCategories is 'NONE' (no UI-relevant changes) +# - $uitestCategories is empty/blank (run-all matrix — too expensive locally) +# Results are appended to the existing uitests/content.md so they show up in +# the same collapsible section of the AI summary comment. + +Write-Host "" +Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Cyan +Write-Host "║ STEP 3: RUN DETECTED UI TESTS ║" -ForegroundColor Cyan +Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Cyan + +$uitestRunResult = "SKIPPED" +$uitestRunnerScript = Join-Path $PSScriptRoot "BuildAndRunHostApp.ps1" + +if ($uitestCategories -eq 'NONE') { + Write-Host " ⏭️ Skipped — detection returned NONE (no UI-relevant changes)" -ForegroundColor DarkGray +} elseif ([string]::IsNullOrWhiteSpace($uitestCategories)) { + Write-Host " ⏭️ Skipped — detection returned the run-all matrix (too expensive to run all categories locally)" -ForegroundColor DarkGray +} elseif (-not (Test-Path $uitestRunnerScript)) { + Write-Host " ⚠️ BuildAndRunHostApp.ps1 not found — cannot run UI tests" -ForegroundColor Yellow +} else { + # Mirror the regression-test platform fallback so a $Platform-less invocation + # still has a concrete target instead of silently picking nothing. + $uitestPlatform = if ($Platform) { $Platform } else { "android" } + + $categoryList = @($uitestCategories -split ',' | ForEach-Object { $_.Trim() } | Where-Object { $_ }) + Write-Host " 🧪 Running $($categoryList.Count) detected UI category(ies) on '$uitestPlatform'…" -ForegroundColor Cyan + + $uitestRunOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/uitests" + New-Item -ItemType Directory -Force -Path $uitestRunOutputDir | Out-Null + + $uitestPassed = 0 + $uitestFailed = 0 + $uitestSkipped = 0 + $uitestDetails = @() + + foreach ($cat in $categoryList) { + Write-Host "" + Write-Host " 📋 [$cat] Invoke-UITestWithRetry -Platform $uitestPlatform -Category $cat" -ForegroundColor Cyan + + # Delegate to the shared deploy+retry script so STEP 3 uses the + # SAME pre-boot + retry-on-env-error + device-reboot pipeline as + # the Gate (verify-tests-fail.ps1's Invoke-TestRun + + # Invoke-TestRunWithRetry). When the Android emulator/iOS sim + # rejects an install ("ADB0010 Broken pipe", XHarness exit 83, + # AppiumServerHasNotBeenStartedLocally, …) the helper retries up + # to 3 times with adb reboot / simctl boot recovery between + # attempts. Without this, a single transient install failure was + # turning into "119 OneTimeSetUp timeouts" in the AI summary. + $catLogPath = Join-Path $uitestRunOutputDir ("$cat-output.log") + $catStart = Get-Date + $sharedRunner = Join-Path $PSScriptRoot "shared/Invoke-UITestWithRetry.ps1" + $runResult = $null + $testOutput = @() + $testExitCode = -1 + $envErrHit = $null + try { + $runResult = & $sharedRunner ` + -Platform $uitestPlatform ` + -Category $cat ` + -RepoRoot $RepoRoot ` + -LogFile $catLogPath + if ($runResult) { + $testOutput = $runResult.Output + $testExitCode = $runResult.ExitCode + $envErrHit = $runResult.EnvErrorHit + Write-Host " Attempts: $($runResult.Attempts) · Exit: $testExitCode · EnvError: $envErrHit" -ForegroundColor Gray + $testOutput | Select-Object -Last 20 | ForEach-Object { Write-Host " $_" } + } + } catch { + Write-Host " ⚠️ Shared runner threw: $_" -ForegroundColor Yellow + $testExitCode = -1 + } + $catDuration = [math]::Round(((Get-Date) - $catStart).TotalSeconds, 1) + + # Parse per-test results. We prefer the TRX file written by + # `dotnet test --logger trx` (mirrors CI pipeline 313's + # `RunTestWithLocalDotNet`) — it's authoritative because it captures + # every test's outcome, duration, error and stack regardless of + # how the console output got wrapped or interleaved. We only fall + # back to scraping the captured stdout via Get-DotNetTestResults + # when the TRX is missing (build/deploy crashed before tests ran, + # or an older BuildAndRunHostApp.ps1 ran without --logger trx). + $perTestResults = @() + $trxAggregate = $null + $trxPath = if ($runResult) { [string]$runResult.TrxResultFile } else { $null } + if ($trxPath -and (Test-Path $trxPath)) { + try { + $trxAggregate = Get-TrxResults -TrxPath $trxPath + if ($trxAggregate) { + $perTestResults = @($trxAggregate.Results) + Write-Host " 📄 TRX parsed: total=$($trxAggregate.Total) passed=$($trxAggregate.Passed) failed=$($trxAggregate.Failed) skipped=$($trxAggregate.Skipped)" -ForegroundColor Cyan + } + } catch { + Write-Host " ⚠️ Failed to parse TRX $trxPath : $_" -ForegroundColor Yellow + } + } + if (-not $trxAggregate) { + try { + $perTestResults = @(Get-DotNetTestResults -Lines $testOutput) + } catch { + Write-Host " ⚠️ Failed to parse per-test results: $_" -ForegroundColor Yellow + } + } + $catFailedTests = @($perTestResults | Where-Object { $_.status -eq 'Failed' }) + $catPassedTests = @($perTestResults | Where-Object { $_.status -eq 'Passed' }) + # Authoritative aggregate counts: TRX > per-test array. (When the TRX + # is present its attribute beats counting + # array items because VSTest may report retries/skips that aren't in + # individual nodes.) + if ($trxAggregate) { + $catTotalCount = [int]$trxAggregate.Total + $catPassedCount = [int]$trxAggregate.Passed + $catFailedCount = [int]$trxAggregate.Failed + } else { + $catTotalCount = $perTestResults.Count + $catPassedCount = $catPassedTests.Count + $catFailedCount = $catFailedTests.Count + } + + if ($testExitCode -eq 0) { + Write-Host " ✅ PASSED ($catDuration s, $catPassedCount test(s))" -ForegroundColor Green + $uitestPassed++ + $uitestDetails += @{ + category = $cat + result = 'PASSED' + duration_s = $catDuration + tests_total = $catTotalCount + tests_passed = $catPassedCount + tests_failed = 0 + passed_tests = @($catPassedTests | ForEach-Object { @{ name = $_.name; duration = $_.duration } }) + failed_tests = @() + } + } elseif ($testExitCode -eq -1) { + Write-Host " ⏭️ SKIPPED" -ForegroundColor DarkGray + $uitestSkipped++ + $uitestDetails += @{ + category = $cat + result = 'SKIPPED' + duration_s = $catDuration + reason = 'Runner threw an exception' + tests_total = 0 + tests_passed = 0 + tests_failed = 0 + passed_tests = @() + failed_tests = @() + } + } else { + Write-Host " ❌ FAILED (exit code: $testExitCode, $catDuration s, $catFailedCount failed test(s))" -ForegroundColor Red + foreach ($ft in $catFailedTests) { + Write-Host " • $($ft.name)" -ForegroundColor Red + } + $uitestFailed++ + # When per-test parsing found no failures (e.g. build/deploy + # crashed before tests ran), capture the last 30 lines of the + # category's stdout so the AI summary can show the actual error + # (CS0246, RS0016, missing dependency, etc.) instead of just + # "exit code 1". + $buildTail = $null + if ($catFailedCount -eq 0) { + try { + $tail = @($testOutput | ForEach-Object { "$_" } | Select-Object -Last 30) + $buildTail = ($tail -join "`n").Trim() + } catch { $buildTail = $null } + } + # Detect infrastructure-level failure: when ALL failures share a + # OneTimeSetUp timeout AND the build log shows the HostApp couldn't + # be installed/launched (ADB install failure, broken pipe, no + # device, etc.), this is a CI infra problem — not real test + # regressions. Reviewers shouldn't be alarmed by "119 failed tests" + # when the app never even started. + # + # If $envErrHit was set above, use that — the retry loop already + # detected an env error and exhausted retries. + # Load shared env-error patterns (single source of truth). + $sharedPatternsScript = Join-Path $PSScriptRoot "shared/Get-EnvErrorPatterns.ps1" + if (Test-Path $sharedPatternsScript) { + . $sharedPatternsScript + $infraSignals = Get-EnvErrorPatterns + } else { + $infraSignals = @( + 'InstallFailedException', + 'Failure calling service package', + 'ADB0010', + 'Broken pipe', + 'no devices/emulators found', + 'device offline', + 'Could not connect to device', + 'Failed to launch the application', + 'cmd: Failure' + ) + } + $infraReason = $envErrHit + if (-not $infraReason -and $catFailedTests.Count -gt 0) { + # Two equally-strong infra-failure indicators: + # (a) every failure is `OneTimeSetUp:` — driver couldn't + # reach the runner UI button. + # (b) the build itself failed (`Build FAILED`) and there + # are zero passes — NUnit then "fails" every test in + # the assembly because the HostApp APK never got + # installed. + $logText = ($testOutput | ForEach-Object { "$_" }) -join "`n" + $allOneTimeSetup = @($catFailedTests | Where-Object { + ($_.error -as [string]) -match '^OneTimeSetUp:' + }).Count -eq $catFailedTests.Count + $buildFailedNoPasses = ($catPassedCount -eq 0) -and ($logText -match '(?m)^Build FAILED\.\s*$') + if ($allOneTimeSetup -or $buildFailedNoPasses) { + foreach ($sig in $infraSignals) { + if ($logText -match $sig) { + $infraReason = $sig + break + } + } + } + } + $uitestDetails += @{ + category = $cat + result = 'FAILED' + duration_s = $catDuration + exit_code = $testExitCode + tests_total = $catTotalCount + tests_passed = $catPassedCount + tests_failed = $catFailedCount + build_tail = $buildTail + infra_failure = $infraReason + trx_path = $trxPath + passed_tests = @($catPassedTests | ForEach-Object { @{ name = $_.name; duration = $_.duration } }) + failed_tests = @($catFailedTests | ForEach-Object { + @{ + name = $_.name + duration = $_.duration + error = $_.error + stack = $_.stack + } + }) + } + } + } + + if ($uitestFailed -gt 0) { + $uitestRunResult = "FAILED" + Write-Host "" + Write-Host " 🔴 UI test result: $uitestPassed passed, $uitestFailed FAILED, $uitestSkipped skipped" -ForegroundColor Red + } elseif ($uitestPassed -gt 0) { + $uitestRunResult = "PASSED" + Write-Host "" + Write-Host " ✅ UI test result: $uitestPassed passed, $uitestSkipped skipped" -ForegroundColor Green + } else { + $uitestRunResult = "SKIPPED" + Write-Host "" + Write-Host " ⏭️ All UI categories skipped ($uitestSkipped total)" -ForegroundColor DarkGray + } + + # Append a results table to the existing uitests/content.md so the same + # collapsible "UI Tests — Category Detection" section in the AI summary + # comment now contains both the detected list and the run results. + $uitestContentFile = Join-Path $uitestRunOutputDir "content.md" + $appendMd = New-Object System.Text.StringBuilder + [void]$appendMd.AppendLine() + [void]$appendMd.AppendLine("### 🧪 UI Test Execution Results") + [void]$appendMd.AppendLine() + $resultIcon = switch ($uitestRunResult) { "PASSED" { "✅" }; "FAILED" { "❌" }; default { "⏭️" } } + [void]$appendMd.AppendLine("$resultIcon **$uitestRunResult** — $uitestPassed passed, $uitestFailed failed, $uitestSkipped skipped (platform: ``$uitestPlatform``)") + [void]$appendMd.AppendLine() + if ($uitestDetails.Count -gt 0) { + [void]$appendMd.AppendLine("| Category | Result | Tests | Duration | Notes |") + [void]$appendMd.AppendLine("|---|---|---|---|---|") + foreach ($d in $uitestDetails) { + $icon = switch ($d.result) { "PASSED" { "✅" }; "FAILED" { "❌" }; default { "⏭️" } } + # Tests column: e.g. "1/1 ✓" on pass, "0/1 (1 ❌)" on fail. When the + # category itself failed but no per-test failures were parsed (e.g. + # build/deploy crashed before tests ran), don't claim a green ✓ — + # show "build/deploy failed" so reviewers aren't misled. + $tCount = if ($null -ne $d.tests_total) { [int]$d.tests_total } else { 0 } + $tPass = if ($null -ne $d.tests_passed) { [int]$d.tests_passed } else { 0 } + $tFail = if ($null -ne $d.tests_failed) { [int]$d.tests_failed } else { 0 } + $testsCol = if ($d.infra_failure) { + "🛠️ infra failure ($tFail bogus failures)" + } + elseif ($d.result -eq 'FAILED' -and $tFail -eq 0) { + if ($tCount -eq 0) { "build/deploy failed" } + else { "$tPass/$tCount — build/deploy failed before per-test results" } + } + elseif ($tCount -eq 0) { "—" } + elseif ($tFail -gt 0) { "$tPass/$tCount ($tFail ❌)" } + else { "$tPass/$tCount ✓" } + $notes = if ($d.infra_failure) { "infra: $($d.infra_failure)" } + elseif ($d.exit_code) { "exit code $($d.exit_code)" } + elseif ($d.reason) { $d.reason } + else { "" } + [void]$appendMd.AppendLine("| ``$($d.category)`` | $icon $($d.result) | $testsCol | $($d.duration_s)s | $notes |") + } + } + [void]$appendMd.AppendLine() + + # Per-failed-category breakdown: collapsible block with each failed test's + # name, error message, and first stack frame so a reviewer can diagnose + # without downloading the full build artifact. When a category failed but + # produced no per-test failures (build/deploy crashed), surface the last + # 30 lines of stdout so the AI summary still pinpoints the cause. + $failedCats = @($uitestDetails | Where-Object { $_.result -eq 'FAILED' -and (($_.failed_tests -and $_.failed_tests.Count -gt 0) -or $_.build_tail) }) + $infraCats = @($failedCats | Where-Object { $_.infra_failure }) + if ($infraCats.Count -gt 0) { + [void]$appendMd.AppendLine("> ⚠️ **Infrastructure failure detected** — for $($infraCats.Count) categor$(if ($infraCats.Count -eq 1) { 'y' } else { 'ies' }) below, the HostApp couldn't be installed or launched on the device (build/deploy failed). NUnit then reports every test in the assembly as failed. **These are NOT real test regressions** — the test runner never started. Look for ``$($infraCats[0].infra_failure)`` in the build log.") + [void]$appendMd.AppendLine() + } + if ($failedCats.Count -gt 0) { + [void]$appendMd.AppendLine("#### Failed test details") + [void]$appendMd.AppendLine() + foreach ($d in $failedCats) { + $hasFailedTests = $d.failed_tests -and $d.failed_tests.Count -gt 0 + $headSummary = if ($d.infra_failure) { + "🛠️ $($d.category) — infra failure ($($d.failed_tests.Count) bogus failures, app never installed)" + } elseif ($hasFailedTests) { + "❌ $($d.category) — $($d.failed_tests.Count) failed test$(if ($d.failed_tests.Count -ne 1) { 's' })" + } else { + "❌ $($d.category) — build/deploy failed (no per-test results)" + } + [void]$appendMd.AppendLine("
$headSummary") + [void]$appendMd.AppendLine() + if ($hasFailedTests) { + # GitHub's comment body limit is 65,536 chars; large categories + # can have 100+ failures with multi-KB error messages each. + # Group by error message to dedup the common "OneTimeSetUp: + # Timed out…" cases (one root cause, N tests). Show full + # detail for the first 5 unique errors, then a compact list. + # @() wrap is required: Group-Object on a single unique key + # returns ONE GroupInfo (not an array), and `.Count` on a + # GroupInfo returns the size of the group, not the number of + # groups — without @() the foreach below would iterate the + # group's members instead of the groups themselves. + $byErr = @($d.failed_tests | Group-Object -Property { + if ($_.error) { ($_.error -as [string]).Substring(0, [Math]::Min(200, ([string]$_.error).Length)) } else { '' } + } | Sort-Object Count -Descending) + + $shownGroups = 0 + foreach ($g in $byErr) { + if ($shownGroups -ge 5) { + $remaining = ($byErr | Select-Object -Skip 5 | Measure-Object -Property Count -Sum).Sum + [void]$appendMd.AppendLine("…and $remaining more failure(s) with other error signatures (see CopilotLogs artifact for full detail).") + [void]$appendMd.AppendLine() + break + } + $shownGroups++ + + $first = $g.Group[0] + $count = $g.Count + if ($count -gt 1) { + $sampleNames = ($g.Group | Select-Object -First 3 | ForEach-Object { "``$($_.name)``" }) -join ', ' + $more = if ($count -gt 3) { ", … (+$($count - 3) more)" } else { '' } + [void]$appendMd.AppendLine("**$count tests failed with the same error** — e.g. $sampleNames$more") + } else { + [void]$appendMd.AppendLine("**``$($first.name)``** *(took $($first.duration))*") + } + [void]$appendMd.AppendLine() + + $errBody = if ($first.error) { + $e = [string]$first.error + if ($e.Length -gt 1500) { $e.Substring(0, 1500) + "`n…(truncated)" } else { $e } + } else { "_(no error message captured)_" } + [void]$appendMd.AppendLine('```') + [void]$appendMd.AppendLine($errBody) + [void]$appendMd.AppendLine('```') + if ($first.stack) { + $firstFrame = ($first.stack -split "`n" | Where-Object { $_.Trim() } | Select-Object -First 1) + if ($firstFrame) { + [void]$appendMd.AppendLine("> at $($firstFrame.Trim().TrimStart('a','t',' '))") + [void]$appendMd.AppendLine() + } + } + } + + # Always print a compact name-only list of every failed test + # so reviewers know exactly which tests need to be re-run, + # even if their error matched a deduped group above. + if ($d.failed_tests.Count -gt 1) { + [void]$appendMd.AppendLine("
All $($d.failed_tests.Count) failed test names") + [void]$appendMd.AppendLine() + foreach ($ft in $d.failed_tests) { + [void]$appendMd.AppendLine("- ``$($ft.name)``") + } + [void]$appendMd.AppendLine() + [void]$appendMd.AppendLine("
") + [void]$appendMd.AppendLine() + } + } + if ($d.build_tail) { + $tail = [string]$d.build_tail + if ($tail.Length -gt 3000) { $tail = $tail.Substring($tail.Length - 3000) } + [void]$appendMd.AppendLine("Last 30 lines of build/test stdout:") + [void]$appendMd.AppendLine() + [void]$appendMd.AppendLine('```') + [void]$appendMd.AppendLine($tail) + [void]$appendMd.AppendLine('```') + } + [void]$appendMd.AppendLine() + [void]$appendMd.AppendLine("
") + [void]$appendMd.AppendLine() + } + } + + # Per-passed-category mini-summary: only emitted if there were ANY passed + # tests, so empty/skipped runs stay quiet. + $passedCats = @($uitestDetails | Where-Object { $_.passed_tests -and $_.passed_tests.Count -gt 0 -and $_.result -eq 'PASSED' }) + if ($passedCats.Count -gt 0) { + [void]$appendMd.AppendLine("
Show $(($passedCats | Measure-Object -Property tests_passed -Sum).Sum) passed test name(s)") + [void]$appendMd.AppendLine() + foreach ($d in $passedCats) { + [void]$appendMd.AppendLine("**``$($d.category)``**") + [void]$appendMd.AppendLine() + foreach ($pt in $d.passed_tests) { + [void]$appendMd.AppendLine("- ``$($pt.name)`` *($($pt.duration))*") + } + [void]$appendMd.AppendLine() + } + [void]$appendMd.AppendLine("
") + [void]$appendMd.AppendLine() + } + [void]$appendMd.AppendLine("_Failures here are informational only — they do not block the gate or affect try-fix candidate scoring._") + Add-Content $uitestContentFile $appendMd.ToString() -Encoding UTF8 + + # JSON summary for downstream consumers / debugging. + @{ + result = $uitestRunResult + platform = $uitestPlatform + passed = $uitestPassed + failed = $uitestFailed + skipped = $uitestSkipped + details = $uitestDetails + } | ConvertTo-Json -Depth 4 | Set-Content (Join-Path $uitestRunOutputDir "test-results.json") -Encoding UTF8 + + # result.txt — one-line traceability marker (PASSED / FAILED / SKIPPED). + $uitestRunResult | Set-Content (Join-Path $uitestRunOutputDir "result.txt") -Encoding UTF8 +} + +# Restore the review branch in case BuildAndRunHostApp.ps1 (or any of its +# child invocations) detached HEAD or switched branches. git checkout $reviewBranch 2>$null | Out-Null if ($LASTEXITCODE -ne 0) { - Write-Host " ⚠️ Failed to restore review branch '$reviewBranch' after Step 0.5 — Step 1 may run against the wrong tree" -ForegroundColor Red + Write-Host " ⚠️ Failed to restore review branch '$reviewBranch' after Step 3 — subsequent steps may run against the wrong tree" -ForegroundColor Red } # ═════════════════════════════════════════════════════════════════════════════ -# STEP 1: Gate - Test Before and After Fix (script, no copilot agent) +# STEP 4: REGRESSION CROSS-REFERENCE (script, no copilot agent) # ═════════════════════════════════════════════════════════════════════════════ +Write-Host "" +Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Cyan +Write-Host "║ STEP 4: REGRESSION CROSS-REFERENCE ║" -ForegroundColor Cyan +Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Cyan + +$regressionOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/regression-check" +$regressionScript = Join-Path $PSScriptRoot "Find-RegressionRisks.ps1" +if (Test-Path $regressionScript) { + try { + & $regressionScript -PRNumber $PRNumber -OutputDir $regressionOutputDir + $regressionResult = if (Test-Path (Join-Path $regressionOutputDir "result.txt")) { + (Get-Content (Join-Path $regressionOutputDir "result.txt") -Raw).Trim() + } else { 'UNKNOWN' } + + switch ($regressionResult) { + 'REVERT' { Write-Host " 🔴 Regression risks detected — see regression-check/content.md" -ForegroundColor Red } + 'OVERLAP' { Write-Host " 🟡 Overlaps with prior bug-fix PRs (lower risk)" -ForegroundColor Yellow } + 'CLEAN' { Write-Host " 🟢 No regression risk detected" -ForegroundColor Green } + default { Write-Host " ⚠️ Unexpected regression-check result: $regressionResult" -ForegroundColor Yellow } + } + } catch { + Write-Host " ⚠️ Regression check failed (non-fatal): $_" -ForegroundColor Yellow + # Write a fallback content.md so downstream steps don't break + New-Item -ItemType Directory -Force -Path $regressionOutputDir | Out-Null + "⚠️ Regression cross-reference failed: $_" | Set-Content (Join-Path $regressionOutputDir "content.md") -Encoding UTF8 + } +} else { + Write-Host " ⚠️ Find-RegressionRisks.ps1 not found" -ForegroundColor Yellow +} + +# --- Regression Test Execution (part of STEP 4) --- +$regressionTestResult = "SKIPPED" +$regressionRisksJson = Join-Path $regressionOutputDir "risks.json" +if (Test-Path $regressionRisksJson) { + try { + $risksData = Get-Content $regressionRisksJson -Raw -Encoding UTF8 | ConvertFrom-Json + } catch { + $risksData = $null + } +} + +if ($risksData -and ($risksData.result -eq 'REVERT' -or $risksData.result -eq 'OVERLAP')) { + # Collect regression tests from ALL risk entries (REVERT + OVERLAP) + $regressionTests = @() + foreach ($risk in @($risksData.risks | Where-Object { $_.regression_tests.Count -gt 0 })) { + foreach ($test in $risk.regression_tests) { + $regressionTests += [PSCustomObject]@{ + FixPR = $risk.recent_pr + Type = $test.type + TestName = $test.test_name + Filter = $test.filter + ProjectPath = $test.project_path + Project = $test.project + Runner = $test.runner + } + } + } + + if ($regressionTests.Count -gt 0) { + Write-Host "" + Write-Host " 🧪 Running $($regressionTests.Count) regression test(s) from fix PRs…" -ForegroundColor Cyan + + $regrTestOutputDir = Join-Path $regressionOutputDir "test-results" + New-Item -ItemType Directory -Force -Path $regrTestOutputDir | Out-Null + + $regrTestPassed = 0 + $regrTestFailed = 0 + $regrTestSkipped = 0 + $regrTestDetails = @() + + $regrPlatform = if ($Platform) { $Platform } else { "android" } + $uiTestRunner = Join-Path $RepoRoot ".github/scripts/BuildAndRunHostApp.ps1" + $deviceTestRunner = Join-Path $RepoRoot ".github/skills/run-device-tests/scripts/Run-DeviceTests.ps1" + + foreach ($t in $regressionTests) { + Write-Host "" + Write-Host " 📋 [$($t.Type)] $($t.TestName) (from fix PR #$($t.FixPR))" -ForegroundColor Cyan + + try { + switch ($t.Type) { + 'UITest' { + if (Test-Path $uiTestRunner) { + Write-Host " 🖥️ Running UI test via BuildAndRunHostApp.ps1 -Platform $regrPlatform -TestFilter `"$($t.Filter)`"" -ForegroundColor Cyan + $testOutput = & $uiTestRunner -Platform $regrPlatform -TestFilter $t.Filter 2>&1 + $testExitCode = $LASTEXITCODE + $testOutput | Select-Object -Last 20 | ForEach-Object { Write-Host " $_" } + } else { + Write-Host " ⚠️ BuildAndRunHostApp.ps1 not found" -ForegroundColor Yellow + $testExitCode = -1 + } + } + 'DeviceTest' { + if (Test-Path $deviceTestRunner) { + $dtProject = if ($t.Project) { $t.Project } else { 'Controls' } + Write-Host " 📱 Running device test via Run-DeviceTests.ps1 -Project $dtProject -Platform $regrPlatform -TestFilter `"$($t.Filter)`"" -ForegroundColor Cyan + $testOutput = & $deviceTestRunner -Project $dtProject -Platform $regrPlatform -TestFilter $t.Filter 2>&1 + $testExitCode = $LASTEXITCODE + $testOutput | Select-Object -Last 20 | ForEach-Object { Write-Host " $_" } + } else { + Write-Host " ⚠️ Run-DeviceTests.ps1 not found" -ForegroundColor Yellow + $testExitCode = -1 + } + } + { $_ -eq 'UnitTest' -or $_ -eq 'XamlUnitTest' } { + if ($t.ProjectPath) { + $resolvedProj = Join-Path $RepoRoot $t.ProjectPath + Write-Host " 🧪 Running: dotnet test $($t.ProjectPath) --filter `"$($t.Filter)`"" -ForegroundColor Cyan + $testOutput = dotnet test $resolvedProj --filter $t.Filter --logger "console;verbosity=minimal" 2>&1 + $testExitCode = $LASTEXITCODE + $testOutput | Select-Object -Last 20 | ForEach-Object { Write-Host " $_" } + } else { + Write-Host " ⚠️ No project path for unit test" -ForegroundColor Yellow + $testExitCode = -1 + } + } + default { + Write-Host " ⚠️ Unknown test type: $($t.Type)" -ForegroundColor Yellow + $testExitCode = -1 + } + } + + if ($testExitCode -eq 0) { + Write-Host " ✅ PASSED" -ForegroundColor Green + $regrTestPassed++ + $regrTestDetails += @{ test = $t.TestName; fix_pr = $t.FixPR; type = $t.Type; result = 'PASSED' } + } elseif ($testExitCode -eq -1) { + Write-Host " ⏭️ SKIPPED" -ForegroundColor DarkGray + $regrTestSkipped++ + $regrTestDetails += @{ test = $t.TestName; fix_pr = $t.FixPR; type = $t.Type; result = 'SKIPPED'; reason = 'Runner not available' } + } else { + Write-Host " ❌ FAILED (exit code: $testExitCode)" -ForegroundColor Red + $regrTestFailed++ + $regrTestDetails += @{ test = $t.TestName; fix_pr = $t.FixPR; type = $t.Type; result = 'FAILED' } + } + } catch { + Write-Host " ⚠️ Error: $_" -ForegroundColor Yellow + $regrTestSkipped++ + $regrTestDetails += @{ test = $t.TestName; fix_pr = $t.FixPR; type = $t.Type; result = 'ERROR'; reason = "$_" } + } + } + + # Determine overall result + if ($regrTestFailed -gt 0) { + $regressionTestResult = "FAILED" + Write-Host " 🔴 Regression test result: $regrTestPassed passed, $regrTestFailed FAILED, $regrTestSkipped skipped" -ForegroundColor Red + } elseif ($regrTestPassed -gt 0) { + $regressionTestResult = "PASSED" + Write-Host " ✅ Regression test result: $regrTestPassed passed, $regrTestSkipped skipped" -ForegroundColor Green + } else { + $regressionTestResult = "SKIPPED" + Write-Host " ⏭️ All regression tests skipped ($regrTestSkipped total)" -ForegroundColor DarkGray + } + + # Append results to regression-check content.md + $regrContentFile = Join-Path $regressionOutputDir "content.md" + if (Test-Path $regrContentFile) { + $appendMd = New-Object System.Text.StringBuilder + [void]$appendMd.AppendLine() + [void]$appendMd.AppendLine("### 🧪 Regression Test Results") + [void]$appendMd.AppendLine() + $resultIcon = switch ($regressionTestResult) { "PASSED" { "✅" }; "FAILED" { "❌" }; default { "⏭️" } } + [void]$appendMd.AppendLine("$resultIcon **$regressionTestResult** — $regrTestPassed passed, $regrTestFailed failed, $regrTestSkipped skipped") + [void]$appendMd.AppendLine() + if ($regrTestDetails.Count -gt 0) { + [void]$appendMd.AppendLine("| Fix PR | Test | Type | Result |") + [void]$appendMd.AppendLine("|---|---|---|---|") + foreach ($d in $regrTestDetails) { + $icon = switch ($d.result) { "PASSED" { "✅" }; "FAILED" { "❌" }; default { "⏭️" } } + [void]$appendMd.AppendLine("| #$($d.fix_pr) | $($d.test) | $($d.type) | $icon $($d.result) |") + } + } + Add-Content $regrContentFile $appendMd.ToString() -Encoding UTF8 + } + + # Write test results JSON + @{ + result = $regressionTestResult + passed = $regrTestPassed + failed = $regrTestFailed + skipped = $regrTestSkipped + details = $regrTestDetails + } | ConvertTo-Json -Depth 4 | Set-Content (Join-Path $regrTestOutputDir "test-results.json") -Encoding UTF8 + } +} + +# ═════════════════════════════════════════════════════════════════════════════ +# STEP 5: Gate - Test Before and After Fix (script, no copilot agent) +# ═════════════════════════════════════════════════════════════════════════════ + +# TEMP: Skip Gate (STEP 5) + Try-Fix (STEP 6) for fast iteration on the +# inline-stages architecture. Both phases are expensive (build the whole +# repo, run agents on multiple candidates) and we just need STEPs 1-4 + +# STEP 7 (post comment) to validate that detectedCategories / +# aiSummaryCommentId output variables flow through to the new +# RunDeepUITests + UpdateAISummaryComment stages. Flip $skipGateAndTryFix +# back to $false (or delete the wrapper) once the new pipeline stages +# are validated end-to-end. +$skipGateAndTryFix = $false +if (-not $skipGateAndTryFix) { + Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Yellow -Write-Host "║ STEP 1: GATE — TEST VERIFICATION ║" -ForegroundColor Yellow +Write-Host "║ STEP 5: GATE — TEST VERIFICATION ║" -ForegroundColor Yellow Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Yellow $gateOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/gate" @@ -788,7 +1618,7 @@ if (-not $DryRun) { git checkout $reviewBranch 2>$null | Out-Null # ═════════════════════════════════════════════════════════════════════════════ -# STEP 2: PR Review (3-phase skill: Pre-Flight, Try-Fix, Report) +# STEP 6: PR Review (3-phase skill: Pre-Flight, Try-Fix, Report) # ═════════════════════════════════════════════════════════════════════════════ $gateStatusForPrompt = switch ($gateResult) { @@ -797,68 +1627,182 @@ $gateStatusForPrompt = switch ($gateResult) { default { "Gate ❌ FAILED — tests did NOT behave as expected." } } -$step2Prompt = @" -Run a multi-candidate PR review for PR #$PRNumber using the following flow. +# Build regression test instruction for try-fix candidates +$regressionTestInstruction = "" +if ($risksData -and $regressionTests -and $regressionTests.Count -gt 0) { + $testLines = @() + foreach ($t in $regressionTests) { + switch ($t.Type) { + 'UITest' { $testLines += " - ``BuildAndRunHostApp.ps1 -Platform $regrPlatform -TestFilter `"$($t.Filter)`"`` (UITest from fix PR #$($t.FixPR))" } + 'DeviceTest' { $proj = if ($t.Project) { $t.Project } else { 'Controls' }; $testLines += " - ``Run-DeviceTests.ps1 -Project $proj -Platform $regrPlatform -TestFilter `"$($t.Filter)`"`` (DeviceTest from fix PR #$($t.FixPR))" } + 'UnitTest' { if ($t.ProjectPath) { $testLines += " - ``dotnet test $($t.ProjectPath) --filter `"$($t.Filter)`"`` (UnitTest from fix PR #$($t.FixPR))" } } + 'XamlUnitTest' { if ($t.ProjectPath) { $testLines += " - ``dotnet test $($t.ProjectPath) --filter `"$($t.Filter)`"`` (XamlUnitTest from fix PR #$($t.FixPR))" } } + } + } + if ($testLines.Count -gt 0) { + $regressionTestInstruction = @" + +## 🔴 REGRESSION TESTS (MANDATORY for every candidate) + +The regression cross-reference detected that this PR modifies files touched by prior bug-fix PRs. **Every try-fix candidate MUST run these additional tests** after its own test command passes. A candidate that passes its own tests but FAILS a regression test should be marked as ``Fail``. + +$($testLines -join "`n") + +Run these AFTER your primary test command succeeds. If any regression test fails, your candidate is ``Fail`` — the fix re-introduces a previously fixed bug. +"@ + } +} + +# ── STEP 6a: Try-Fix — iterative candidate generation (Copilot call 1) ──── +$step6aPrompt = @" +Generate alternative fix candidates for PR #$PRNumber using an iterative expert-review-and-test loop. ## Phase 1 — Pre-Flight (context only) -Use the pr-review skill's pre-flight phase to gather context. Do NOT modify code. +Use the pr-review skill's pre-flight phase to gather context about the issue and PR. Do NOT modify code. Write summary to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/pre-flight/content.md``. -## Phase 2 — Candidate generation (run BOTH branches; do not skip either) -Generate the following candidates. Each candidate is an alternative diff against the PR's base branch. Do this work in isolated worktrees / scratch copies so artifacts do NOT clobber each other. +## Phase 2 — Iterative Try-Fix loop +For each candidate, follow this cycle: + +1. **Generate** — Use the code-review skill with the maui-expert-reviewer agent to analyze the problem and generate a fix candidate. Each candidate must explore a DIFFERENT approach from the PR's current fix and from previous candidates. The expert reviewer provides domain-specific guidance for MAUI (handlers, platform specifics, layout, etc.). +2. **Test** — Run the candidate against the gate criteria and regression tests. Record pass/fail. +3. **Learn** — If the candidate failed, feed the failure details (test output, error messages) back to the expert reviewer to inform the next candidate. +4. **Repeat or stop** — Generate the next candidate incorporating lessons from failures. Stop when: + - A candidate passes ALL tests and is demonstrably better than the PR's fix, OR + - You've exhausted meaningfully different approaches (don't generate trivial variations) + +Number candidates sequentially (``try-fix-1``, ``try-fix-2``, ``try-fix-3``, ...). + +For each candidate: +- Write output to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/try-fix-{N}/content.md`` +- Include: approach description, diff, test results, failure analysis (if failed) + +Aggregate all try-fix narrative to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/try-fix/content.md``. +$regressionTestInstruction + +$platformInstruction +$autonomousRules + +**Gate result (already completed in a prior step):** $gateStatusForPrompt +Do NOT re-run gate verification. The gate phase is handled separately. +⚠️ Do NOT create or overwrite ``gate/content.md`` — it is already generated by the gate script with detailed test output. +"@ + +Invoke-CopilotStep -StepName "STEP 6a: TRY-FIX" -Prompt $step6aPrompt | Out-Null + +# Restore review branch between copilot calls +git checkout $reviewBranch 2>$null | Out-Null + +# Diagnostic: check what STEP 6a produced +Write-Host "" +Write-Host " 📊 STEP 6a output check:" -ForegroundColor Cyan +$tryFixDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent" +$tryFixContent = Join-Path $tryFixDir "try-fix/content.md" +$preFlightContent = Join-Path $tryFixDir "pre-flight/content.md" +if (Test-Path $preFlightContent) { + $pfSize = (Get-Item $preFlightContent).Length + Write-Host " ✅ pre-flight/content.md ($pfSize bytes)" -ForegroundColor Green +} else { + Write-Host " ❌ pre-flight/content.md MISSING" -ForegroundColor Red +} +if (Test-Path $tryFixContent) { + $tfSize = (Get-Item $tryFixContent).Length + Write-Host " ✅ try-fix/content.md ($tfSize bytes)" -ForegroundColor Green +} else { + Write-Host " ⚠️ try-fix/content.md not found (agent may not have written it)" -ForegroundColor Yellow +} +$tryFixDirs = Get-ChildItem -Path $tryFixDir -Directory -Filter "try-fix-*" -ErrorAction SilentlyContinue +if ($tryFixDirs) { + Write-Host " 📁 Try-fix candidates: $($tryFixDirs.Count) ($($tryFixDirs.Name -join ', '))" -ForegroundColor Cyan +} else { + Write-Host " ⚠️ No try-fix-N directories found" -ForegroundColor Yellow +} + +# ── STEP 6b: Expert Review of PR fix + final comparison (Copilot call 2) ── +$step6bPrompt = @" +Run expert code review of PR #$PRNumber's fix and compare against all try-fix candidates from STEP 6a. + +Read context from: +- ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/pre-flight/content.md`` +- ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/try-fix/content.md`` (and individual try-fix-{N}/content.md files) -### Branch A — Expert reviewer evaluation of the current PR fix (in sandbox) +## Phase 1 — Expert reviewer evaluation of the PR fix Use the code-review skill with the maui-expert-reviewer agent to evaluate the PR's existing fix. Apply the reviewer's actionable feedback in a sandbox copy and treat the result as a candidate named ``pr-plus-reviewer``. - Always also write the raw inline findings to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/inline-findings.json`` (these are file:line findings against the PR's diff and feed the inline-comment posting step). - Write candidate output to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/expert-pr-eval/content.md``. -### Branch B — Try-Fix ×4 (ALWAYS runs — do NOT skip) -Use the pr-review skill's try-fix phase to generate FOUR independent candidate fixes (``try-fix-1`` through ``try-fix-4``). Each candidate must load domain knowledge from a different maui-expert-reviewer dimension so the candidates are diverse. -- 🚨 You MUST generate all four candidates. Do not short-circuit even if Pre-Flight or the expert eval suggests the PR is already correct. -- Write each candidate's output to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/try-fix-{N}/content.md`` (N = 1..4). -- Aggregate try-fix narrative for the AI summary comment to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/try-fix/content.md``. - -## Phase 3 — Report -The expert reviewer evaluates ALL candidates against each other: +## Phase 2 — Comparative Report +Compare ALL candidates: - ``pr`` (the raw PR fix as submitted) -- ``pr-plus-reviewer`` (PR fix + reviewer feedback applied in sandbox) -- ``try-fix-1``..``try-fix-4`` -Pick the single winning candidate. +- ``pr-plus-reviewer`` (PR fix + expert reviewer feedback applied) +- All ``try-fix-N`` candidates from STEP 6a +Pick the single winning candidate. **Candidates that failed regression tests MUST be ranked lower than candidates that passed them.** Write the comparative analysis to ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/report/content.md``. -## Phase 4 — Winner manifest (REQUIRED) +## Phase 3 — Winner manifest (REQUIRED) Write ``CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/winner.json`` with this exact schema: ``````json { "schemaVersion": 1, - "winner": "pr" | "pr-plus-reviewer" | "try-fix-1" | "try-fix-2" | "try-fix-3" | "try-fix-4", + "winner": "pr" | "pr-plus-reviewer" | "try-fix-N", "isPRFix": true | false, "summary": "1-3 sentence rationale for why this candidate won", - "candidateDiff": "" + "candidateDiff": "" } `````` Rules: - ``isPRFix`` MUST be ``true`` when ``winner`` is ``pr`` or ``pr-plus-reviewer``. - ``isPRFix`` MUST be ``false`` when ``winner`` is any ``try-fix-*``. -- When ``isPRFix`` is ``false``, ``candidateDiff`` MUST be a non-empty unified diff. Truncate at 55 KB if larger and end with a ``... [truncated]`` marker line. +- When ``isPRFix`` is ``false``, ``candidateDiff`` MUST be a non-empty unified diff. $platformInstruction $autonomousRules -**Gate result (already completed in a prior step):** $gateStatusForPrompt -Do NOT re-run gate verification. The gate phase is handled separately. -⚠️ Do NOT create or overwrite ``gate/content.md`` — it is already generated by the gate script with detailed test output. +**Gate result:** $gateStatusForPrompt +Do NOT re-run gate verification. "@ -Invoke-CopilotStep -StepName "STEP 2: PR REVIEW" -Prompt $step2Prompt | Out-Null +Invoke-CopilotStep -StepName "STEP 6b: EXPERT REVIEW + COMPARE" -Prompt $step6bPrompt | Out-Null + +# Diagnostic: check what STEP 6b produced +Write-Host "" +Write-Host " 📊 STEP 6b output check:" -ForegroundColor Cyan +$expertEvalContent = Join-Path $tryFixDir "expert-pr-eval/content.md" +$reportContent = Join-Path $tryFixDir "report/content.md" +$winnerFile = Join-Path $tryFixDir "winner.json" +$inlineFindings = Join-Path $tryFixDir "inline-findings.json" +if (Test-Path $expertEvalContent) { + $eeSize = (Get-Item $expertEvalContent).Length + Write-Host " ✅ expert-pr-eval/content.md ($eeSize bytes)" -ForegroundColor Green +} else { + Write-Host " ❌ expert-pr-eval/content.md MISSING — expert review did not complete" -ForegroundColor Red +} +if (Test-Path $reportContent) { + $rpSize = (Get-Item $reportContent).Length + Write-Host " ✅ report/content.md ($rpSize bytes)" -ForegroundColor Green +} else { + Write-Host " ❌ report/content.md MISSING — comparative report not written" -ForegroundColor Red +} +if (Test-Path $winnerFile) { + $winnerJson = Get-Content -Raw $winnerFile | ConvertFrom-Json -ErrorAction SilentlyContinue + Write-Host " 🏆 winner.json: winner=$($winnerJson.winner) isPRFix=$($winnerJson.isPRFix)" -ForegroundColor Green +} else { + Write-Host " ❌ winner.json MISSING — no winner determined" -ForegroundColor Red +} +if (Test-Path $inlineFindings) { + $ifSize = (Get-Item $inlineFindings).Length + Write-Host " ✅ inline-findings.json ($ifSize bytes)" -ForegroundColor Green +} else { + Write-Host " ⚠️ inline-findings.json not found" -ForegroundColor Yellow +} # Restore review branch — the Copilot agent may have switched branches (e.g. via gh pr checkout) git checkout $reviewBranch 2>$null | Out-Null # ─── Tier 3 refresh: feed AI categories back into category detection ─── -# Step 0.5 ran detection without the AI tier (-AiCategories was empty). -# Pre-flight (Step 2) wrote `ai-categories.md`; re-run detection now so the -# unified comment reflects all three tiers before Step 3 posts. +# Step 2 ran detection without the AI tier (-AiCategories was empty). +# Pre-flight (Step 6) wrote `ai-categories.md`; re-run detection now so the +# unified comment reflects all three tiers before Step 7 posts. $aiCategoriesFile = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/uitests/ai-categories.md" if ((Test-Path $detectScript) -and (Test-Path $aiCategoriesFile)) { try { @@ -877,31 +1821,71 @@ if ((Test-Path $detectScript) -and (Test-Path $aiCategoriesFile)) { } } + # Re-emit the AzDO output variable so Stage 2 (RunDeepUITests) + # picks up the AI-refreshed category list, not the pre-AI one. + if ($refreshedCategories -ne $uitestCategories) { + $refreshedForOutput = if ($refreshedCategories -eq 'NONE') { 'NONE' } + elseif ([string]::IsNullOrWhiteSpace($refreshedCategories)) { 'ALL' } + else { $refreshedCategories } + Write-Host "##vso[task.setvariable variable=detectedCategories;isOutput=true]$refreshedForOutput" + Write-Host " 🔁 Updated detectedCategories output: $refreshedForOutput" -ForegroundColor Green + } + $uitestOutputDir = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/uitests" + $uitestContentFile = Join-Path $uitestOutputDir "content.md" + + # Preserve any STEP 3 results table that was appended earlier so + # the post-comment phase keeps the actual run output (categories + + # execution table) instead of just the refreshed category list. + $preservedExecution = "" + if (Test-Path $uitestContentFile) { + $existing = Get-Content $uitestContentFile -Raw + $marker = '### 🧪 UI Test Execution Results' + $idx = $existing.IndexOf($marker) + if ($idx -ge 0) { + $preservedExecution = $existing.Substring($idx) + } + } + if ($refreshedCategories -eq 'NONE') { - "No UI test categories needed for this PR (no UI-relevant changes)." | Set-Content (Join-Path $uitestOutputDir "content.md") -Encoding UTF8 + "No UI test categories needed for this PR (no UI-relevant changes)." | Set-Content $uitestContentFile -Encoding UTF8 } elseif ([string]::IsNullOrWhiteSpace($refreshedCategories)) { - "Full UI test matrix will run (no specific categories detected from PR changes)." | Set-Content (Join-Path $uitestOutputDir "content.md") -Encoding UTF8 + "Full UI test matrix will run (no specific categories detected from PR changes)." | Set-Content $uitestContentFile -Encoding UTF8 } else { - "**Detected UI test categories:** ``$refreshedCategories``" | Set-Content (Join-Path $uitestOutputDir "content.md") -Encoding UTF8 + "**Detected UI test categories:** ``$refreshedCategories``" | Set-Content $uitestContentFile -Encoding UTF8 + } + + if (-not [string]::IsNullOrWhiteSpace($preservedExecution)) { + Add-Content $uitestContentFile "`n$preservedExecution" -Encoding UTF8 } } } catch { - Write-Host " ⚠️ AI-tier category refresh failed (non-fatal, keeping Step 0.5 result): $_" -ForegroundColor Yellow + Write-Host " ⚠️ AI-tier category refresh failed (non-fatal, keeping Step 2 result): $_" -ForegroundColor Yellow } } +} # END TEMP SKIP wrapper for STEP 5 (Gate) + STEP 6 (Try-Fix) — see $skipGateAndTryFix above + # ═════════════════════════════════════════════════════════════════════════════ -# STEP 3: Post AI Summary Comment (direct script invocation) +# STEP 7: Post AI Summary Comment (direct script invocation) +# When DEFER_COMMENT_TO_STAGE3=true, skip posting here — Stage 3 +# (UpdateAISummaryComment) will post the full comment after deep tests. # ═════════════════════════════════════════════════════════════════════════════ Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Magenta -Write-Host "║ STEP 3: POST AI SUMMARY ║" -ForegroundColor Magenta +Write-Host "║ STEP 7: POST AI SUMMARY ║" -ForegroundColor Magenta Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Magenta $summaryScriptsDir = Join-Path $RepoRoot ".github/scripts" +if ($env:DEFER_COMMENT_TO_STAGE3 -eq 'true') { + Write-Host " ⏭️ Deferred to Stage 3 (DEFER_COMMENT_TO_STAGE3=true)" -ForegroundColor Gray + Write-Host " ℹ️ Content files saved in CopilotLogs artifact" -ForegroundColor Gray + # Still emit a dummy output var so Stage 3 condition works + Write-Host "##vso[task.setvariable variable=aiSummaryCommentId;isOutput=true]DEFERRED" +} else { + # Post PR review phases (pre-flight, try-fix, report) $aiSummaryCommentId = $null $reviewScript = Join-Path $summaryScriptsDir "post-ai-summary-comment.ps1" @@ -918,6 +1902,15 @@ if (Test-Path $reviewScript) { if ($idLine -match '^COMMENT_ID=(\d+)$') { $aiSummaryCommentId = $Matches[1] Write-Host " ✅ PR review summary posted (comment ID: $aiSummaryCommentId)" -ForegroundColor Green + + # Persist comment ID + PR number to a known location and emit + # as an output variable so the downstream UpdateAISummaryComment + # stage in ci-copilot.yml can rewrite the STEP 3 section once + # the deep UI tests finish on the platform-pool agents. + $commentIdFile = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/ai-summary-comment-id.txt" + New-Item -ItemType Directory -Force -Path (Split-Path -Parent $commentIdFile) | Out-Null + $aiSummaryCommentId | Set-Content $commentIdFile -Encoding UTF8 + Write-Host "##vso[task.setvariable variable=aiSummaryCommentId;isOutput=true]$aiSummaryCommentId" } else { Write-Host " ✅ PR review summary posted" -ForegroundColor Green } @@ -928,6 +1921,8 @@ if (Test-Path $reviewScript) { Write-Host " ⚠️ post-ai-summary-comment.ps1 not found — skipping review summary" -ForegroundColor Yellow } +} # END DEFER_COMMENT_TO_STAGE3 else block (summary comment only — inline findings + labels always run below) + # Determine winning candidate (winner.json) — drives whether we post inline findings or request changes $winnerFile = Join-Path $RepoRoot "CustomAgentLogsTmp/PRState/$PRNumber/PRAgent/winner.json" $winner = $null @@ -1061,12 +2056,12 @@ $( if ($truncated) { "`n_The diff was truncated to fit GitHub's review body limi } # ═════════════════════════════════════════════════════════════════════════════ -# STEP 4: Apply Labels +# STEP 8: Apply Labels # ═════════════════════════════════════════════════════════════════════════════ Write-Host "" Write-Host "╔═══════════════════════════════════════════════════════════╗" -ForegroundColor Blue -Write-Host "║ STEP 4: APPLY LABELS ║" -ForegroundColor Blue +Write-Host "║ STEP 8: APPLY LABELS ║" -ForegroundColor Blue Write-Host "╚═══════════════════════════════════════════════════════════╝" -ForegroundColor Blue $labelHelperPath = Join-Path $RepoRoot ".github/scripts/shared/Update-AgentLabels.ps1" diff --git a/.github/scripts/post-ai-summary-comment.ps1 b/.github/scripts/post-ai-summary-comment.ps1 index 6c5781e4bce9..5e375790fe02 100644 --- a/.github/scripts/post-ai-summary-comment.ps1 +++ b/.github/scripts/post-ai-summary-comment.ps1 @@ -67,11 +67,12 @@ if (-not (Test-Path $PRAgentDir)) { } $phases = [ordered]@{ - "uitests" = @{ File = "uitests/content.md"; Icon = "🧪"; Title = "UI Tests — Category Detection" } - "pre-flight" = @{ File = "pre-flight/content.md"; Icon = "🔍"; Title = "Pre-Flight — Context & Validation" } - "code-review" = @{ File = "pre-flight/code-review.md"; Icon = "🔬"; Title = "Code Review — Deep Analysis" } - "try-fix" = @{ File = "try-fix/content.md"; Icon = "🔧"; Title = "Fix — Analysis & Comparison" } - "report" = @{ File = "report/content.md"; Icon = "📋"; Title = "Report — Final Recommendation" } + "uitests" = @{ File = "uitests/content.md"; Icon = "🧪"; Title = "UI Tests" } + "regression-check" = @{ File = "regression-check/content.md"; Icon = "🔍"; Title = "Regression Cross-Reference" } + "pre-flight" = @{ File = "pre-flight/content.md"; Icon = "🔍"; Title = "Pre-Flight — Context & Validation" } + "code-review" = @{ File = "pre-flight/code-review.md"; Icon = "🔬"; Title = "Code Review — Deep Analysis" } + "try-fix" = @{ File = "try-fix/content.md"; Icon = "🔧"; Title = "Fix — Analysis & Comparison" } + "report" = @{ File = "report/content.md"; Icon = "📋"; Title = "Report — Final Recommendation" } } # ─── Gate content (rendered first, always open) ─── @@ -84,8 +85,7 @@ if (Test-Path $gateFilePath) { $gateSection = @"
🚦 Gate — Test Before & After Fix - ---- +
$gateContent @@ -108,11 +108,18 @@ foreach ($key in $phases.Keys) { $content = Get-Content $filePath -Raw -Encoding UTF8 if (-not [string]::IsNullOrWhiteSpace($content)) { Write-Host " ✅ $key ($((Get-Item $filePath).Length) bytes)" -ForegroundColor Green + # For uitests, make title dynamic: "UI Tests — Cat1, Cat2" + $phaseTitle = "$($phase.Icon) $($phase.Title)" + if ($key -eq "uitests") { + $catMatch = [regex]::Match($content, 'Detected UI test categories:\*\*\s*`{1,2}([^`]+)`{1,2}') + if ($catMatch.Success) { + $phaseTitle = "$($phase.Icon) $($phase.Title) — $($catMatch.Groups[1].Value)" + } + } $phaseSections += @"
-$($phase.Icon) $($phase.Title) - ---- +$phaseTitle +
$content @@ -172,8 +179,7 @@ $newSessionBlock = @" $sessionMarkerStart
📊 Review Session$commitSha7 · $commitTitle · $timestamp - ---- +
$phaseContent diff --git a/.github/scripts/post-inline-review.ps1 b/.github/scripts/post-inline-review.ps1 index e4fe45f509df..6b77ae0b76b0 100644 --- a/.github/scripts/post-inline-review.ps1 +++ b/.github/scripts/post-inline-review.ps1 @@ -80,7 +80,38 @@ if (-not (Test-Path $FindingsFile)) { # ============================================================================ Write-Host "Loading findings from: $FindingsFile" -ForegroundColor Cyan -$findings = Get-Content -Path $FindingsFile -Raw -Encoding UTF8 | ConvertFrom-Json +$rawJson = Get-Content -Path $FindingsFile -Raw -Encoding UTF8 +$parsed = $rawJson | ConvertFrom-Json + +# Diagnostic: log what the parser sees +Write-Host " Parsed type: $($parsed.GetType().FullName)" -ForegroundColor Gray +if ($parsed -is [System.Management.Automation.PSCustomObject]) { + Write-Host " Object properties: $(($parsed.PSObject.Properties | ForEach-Object { $_.Name }) -join ', ')" -ForegroundColor Gray +} + +# The agent may produce: +# 1. A bare array [...] of findings +# 2. An object wrapper {"findings": [...]} or {"schemaVersion":1, "findings":[...]} +# 3. An object wrapper {"items": [...]} +# 4. A single finding object {...} +# Detect and unwrap all forms robustly. +$findings = @() +if ($parsed -is [System.Collections.IEnumerable] -and $parsed -isnot [string]) { + # Already an array + $findings = @($parsed) +} elseif ($parsed.PSObject.Properties.Match('findings').Count -gt 0 -and $null -ne $parsed.findings) { + # Object wrapper with explicit 'findings' property + $findings = @($parsed.findings) +} elseif ($parsed.PSObject.Properties.Match('items').Count -gt 0 -and $null -ne $parsed.items) { + # Alternative wrapper with 'items' property + $findings = @($parsed.items) +} elseif ($parsed.PSObject.Properties.Match('file').Count -gt 0 -or $parsed.PSObject.Properties.Match('path').Count -gt 0) { + # Single finding object — wrap in array + $findings = @($parsed) +} else { + Write-Host " ⚠️ Unrecognized findings format — dumping first 200 chars:" -ForegroundColor Yellow + Write-Host " $($rawJson.Substring(0, [Math]::Min(200, $rawJson.Length)))" -ForegroundColor Gray +} if (-not $findings -or $findings.Count -eq 0) { Write-Host "No findings to post." -ForegroundColor Green @@ -88,6 +119,7 @@ if (-not $findings -or $findings.Count -eq 0) { } Write-Host " Found $($findings.Count) inline findings" -ForegroundColor Gray +Write-Host " First finding keys: $(($findings[0].PSObject.Properties | ForEach-Object { $_.Name }) -join ', ')" -ForegroundColor Gray # Load summary if available $summaryBody = "" @@ -120,7 +152,7 @@ foreach ($f in $findings) { # Defense-in-depth: reject suspicious paths so a malformed/hostile finding # cannot poison the whole review post (especially in the fallback branch # below where the GitHub diff fetch failed and we can't cross-validate). - $p = [string]$f.path + $p = if ($f.path) { [string]$f.path } elseif ($f.file) { [string]$f.file } else { '' } if ([string]::IsNullOrWhiteSpace($p) -or $p.Contains('..') -or $p.StartsWith('/') -or @@ -135,7 +167,7 @@ foreach ($f in $findings) { $comment = @{ path = $p line = [int]$f.line - body = $f.body + body = if ($f.body) { [string]$f.body } elseif ($f.message) { [string]$f.message } elseif ($f.content) { [string]$f.content } else { "(no description)" } } # GitHub API requires 'side' for pull request review comments $comment['side'] = 'RIGHT' diff --git a/.github/scripts/shared/Aggregate-UITestArtifacts.Tests.ps1 b/.github/scripts/shared/Aggregate-UITestArtifacts.Tests.ps1 new file mode 100644 index 000000000000..077556a88a95 --- /dev/null +++ b/.github/scripts/shared/Aggregate-UITestArtifacts.Tests.ps1 @@ -0,0 +1,169 @@ +#!/usr/bin/env pwsh +#Requires -Modules Pester +<# +.SYNOPSIS + Pester tests for Aggregate-UITestArtifacts.ps1. + + The script downloads AzDO artifacts and parses TRX files. We don't + actually call AzDO in tests — instead we lay out a fake artifact + directory tree and exercise the TRX-parsing + aggregation paths, + plus the artifact-name → category extraction helper. + +.EXAMPLE + Invoke-Pester ./Aggregate-UITestArtifacts.Tests.ps1 -Output Detailed +#> + +BeforeAll { + $script:scriptPath = Join-Path $PSScriptRoot 'Aggregate-UITestArtifacts.ps1' + $script:fixtureRoot = Join-Path ([System.IO.Path]::GetTempPath()) "agg-fixtures-$(New-Guid)" + New-Item -ItemType Directory -Path $script:fixtureRoot -Force | Out-Null + + # Helper to write a synthetic TRX with given totals + per-test results. + function New-TrxFixture { + param( + [string]$Path, + [int]$Total, + [int]$Passed, + [int]$Failed, + [int]$Skipped = 0, + [string[]]$PassedTests = @(), + [string[]]$FailedTests = @() + ) + $executed = $Total - $Skipped + $passedXml = ($PassedTests | ForEach-Object { + " " + }) -join "`n" + $failedXml = ($FailedTests | ForEach-Object { + " boom" + }) -join "`n" + @" + + + + + + +$passedXml +$failedXml + + +"@ | Set-Content -Path $Path -Encoding UTF8 + } + + # Helper to extract a function from the script under test (mirrors the + # extraction pattern Review-PR.Tests.ps1 uses). + function Get-FunctionBody { + param([string]$ScriptText, [string]$FunctionName) + $start = $ScriptText.IndexOf("function $FunctionName") + if ($start -lt 0) { throw "Function '$FunctionName' not found" } + $i = $ScriptText.IndexOf('{', $start) + $depth = 0; $end = -1 + for (; $i -lt $ScriptText.Length; $i++) { + $c = $ScriptText[$i] + if ($c -eq '{') { $depth++ } + elseif ($c -eq '}') { $depth--; if ($depth -eq 0) { $end = $i; break } } + } + return $ScriptText.Substring($start, $end - $start + 1) + } + + $aggSrc = Get-Content -Raw -Path $script:scriptPath + Invoke-Expression (Get-FunctionBody -ScriptText $aggSrc -FunctionName 'Get-CategoryFromArtifactName') + Invoke-Expression (Get-FunctionBody -ScriptText $aggSrc -FunctionName 'Get-AggregatedTrxFromDirectory') + + # Get-AggregatedTrxFromDirectory needs Get-TrxResults — extract it from + # Review-PR.ps1 the same way the script-under-test does. + $reviewSrc = Get-Content -Raw -Path (Join-Path (Split-Path -Parent $PSScriptRoot) 'Review-PR.ps1') + $fnMatch = [regex]::Match($reviewSrc, '(?ms)^function\s+Get-TrxResults\s*\{.*?^\}', 'Multiline') + Invoke-Expression $fnMatch.Value +} + +AfterAll { + Remove-Item -Path $script:fixtureRoot -Recurse -Force -ErrorAction SilentlyContinue +} + +Describe 'Get-CategoryFromArtifactName' { + It 'extracts CollectionView from android stage drop name' { + $r = Get-CategoryFromArtifactName -ArtifactName 'drop-android_ui_tests-android_ui_tests_controls_30 CollectionView-1' + $r | Should -Match 'CollectionView' + } + It 'extracts category from ios mono stage drop name' { + $r = Get-CategoryFromArtifactName -ArtifactName 'drop-ios_ui_tests_mono-ios_ui_tests_mono_controls_latest Editor-1' + $r | Should -Match 'Editor' + } + It 'extracts category from winui stage drop name' { + $r = Get-CategoryFromArtifactName -ArtifactName 'drop-winui_ui_tests-winui_ui_tests_controls Label-2' + $r | Should -Match 'Label' + } + It 'returns the artifact tail when prefix is unknown' { + $r = Get-CategoryFromArtifactName -ArtifactName 'unknown_stage_foo' + $r | Should -Be 'unknown_stage_foo' + } +} + +Describe 'Get-AggregatedTrxFromDirectory (TRX walk + merge)' { + BeforeAll { + $script:trxRoot = Join-Path $script:fixtureRoot 'agg-test' + New-Item -ItemType Directory -Path $script:trxRoot -Force | Out-Null + + $cv = Join-Path $script:trxRoot 'drop-android_ui_tests-android_ui_tests_controls_30 CollectionView-1' + New-Item -ItemType Directory -Path $cv -Force | Out-Null + New-TrxFixture -Path (Join-Path $cv 'cv.trx') ` + -Total 619 -Passed 75 -Failed 544 ` + -PassedTests @('Test1','Test2') -FailedTests @('Test3','Test4') + + $ed = Join-Path $script:trxRoot 'drop-android_ui_tests-android_ui_tests_controls_30 Editor-1' + New-Item -ItemType Directory -Path $ed -Force | Out-Null + New-TrxFixture -Path (Join-Path $ed 'editor.trx') ` + -Total 119 -Passed 51 -Failed 68 ` + -PassedTests @('EditTest1') -FailedTests @('EditTest2') + } + + It 'aggregates per-category counts from a tree of drop-* artifact dirs' { + $r = Get-AggregatedTrxFromDirectory -RootDir $script:trxRoot + $r.Keys.Count | Should -Be 2 + + # Find the CollectionView bucket + $cvKey = $r.Keys | Where-Object { $_ -match 'CollectionView' } | Select-Object -First 1 + $cvKey | Should -Not -BeNullOrEmpty + $r[$cvKey].Total | Should -Be 619 + $r[$cvKey].Passed | Should -Be 75 + $r[$cvKey].Failed | Should -Be 544 + + $edKey = $r.Keys | Where-Object { $_ -match 'Editor' } | Select-Object -First 1 + $edKey | Should -Not -BeNullOrEmpty + $r[$edKey].Total | Should -Be 119 + $r[$edKey].Passed | Should -Be 51 + $r[$edKey].Failed | Should -Be 68 + } + + It 'sums multiple TRX files for the same category' { + $double = Join-Path $script:fixtureRoot 'double-test' + New-Item -ItemType Directory -Path $double -Force | Out-Null + $catDir = Join-Path $double 'drop-android_ui_tests-android_ui_tests_controls_30 Label-1' + New-Item -ItemType Directory -Path $catDir -Force | Out-Null + New-TrxFixture -Path (Join-Path $catDir 'a.trx') -Total 50 -Passed 40 -Failed 10 + New-TrxFixture -Path (Join-Path $catDir 'b.trx') -Total 20 -Passed 15 -Failed 5 + + $r = Get-AggregatedTrxFromDirectory -RootDir $double + $r.Keys.Count | Should -Be 1 + $key = @($r.Keys)[0] + $r[$key].Total | Should -Be 70 # 50+20 + $r[$key].Passed | Should -Be 55 # 40+15 + $r[$key].Failed | Should -Be 15 # 10+5 + $r[$key].TrxPaths.Count | Should -Be 2 + } + + It 'returns empty hashtable when no TRX files are present' { + $empty = Join-Path $script:fixtureRoot 'empty-test' + New-Item -ItemType Directory -Path $empty -Force | Out-Null + $r = Get-AggregatedTrxFromDirectory -RootDir $empty + $r | Should -BeOfType [hashtable] + $r.Count | Should -Be 0 + } + + It 'returns empty hashtable when RootDir does not exist' { + $r = Get-AggregatedTrxFromDirectory -RootDir '/does/not/exist/anywhere' + $r | Should -BeOfType [hashtable] + $r.Count | Should -Be 0 + } +} diff --git a/.github/scripts/shared/Aggregate-UITestArtifacts.ps1 b/.github/scripts/shared/Aggregate-UITestArtifacts.ps1 new file mode 100644 index 000000000000..306a5ff548bb --- /dev/null +++ b/.github/scripts/shared/Aggregate-UITestArtifacts.ps1 @@ -0,0 +1,193 @@ +<# +.SYNOPSIS + Download AzDO build artifacts from a ci-copilot-uitests child build, + parse all TRX files, and merge them into per-category aggregates the + Review-PR.ps1 STEP 3 renderer expects. + +.DESCRIPTION + The child pipeline (eng/pipelines/ci-copilot-uitests.yml) publishes + one drop-* artifact per matrix job (one job per detected category per + platform) via PublishBuildArtifacts@1 in ui-tests-steps.yml. Each + artifact contains the TRX file from `dotnet test --logger trx`. + + This script: + 1. Lists artifacts on the build (filtered to drop-* + ui-tests-samples). + 2. Downloads them into a temp dir. + 3. Walks all .trx files. + 4. Calls Get-TrxResults from Review-PR.ps1 (sourced via -ScriptDir) + to parse each one. + 5. Merges results by category. The category for each TRX is derived + from the artifact name (drop--- where job + contains the CATEGORYGROUP matrix variable). + + Returns a hashtable keyed by category name. Each value matches the + shape returned by Get-TrxResults so the existing renderer in + Review-PR.ps1 just needs the per-category dict. + +.PARAMETER BuildId + AzDO build ID returned by Wait-CopilotUITests. + +.PARAMETER OutputDir + Where to download artifacts. Defaults to a temp folder. + +.PARAMETER ScriptDir + Path to .github/scripts (so we can dot-source Get-TrxResults from + Review-PR.ps1). Defaults to the parent of this script. +#> +param( + [Parameter(Mandatory=$true)] + [int]$BuildId, + + [string]$OutputDir = "", + + [string]$ScriptDir = "", + + [string]$Org = "https://devdiv.visualstudio.com", + [string]$Project = "DevDiv" +) + +$ErrorActionPreference = 'Stop' + +if ([string]::IsNullOrWhiteSpace($ScriptDir)) { + # shared/Aggregate-UITestArtifacts.ps1 lives in .github/scripts/shared, + # Get-TrxResults lives one level up in .github/scripts/Review-PR.ps1. + $ScriptDir = Split-Path -Parent (Split-Path -Parent $MyInvocation.MyCommand.Path) +} +$reviewScript = Join-Path $ScriptDir "Review-PR.ps1" +if (-not (Test-Path $reviewScript)) { + throw "Review-PR.ps1 not found at '$reviewScript' — needed for Get-TrxResults" +} + +if ([string]::IsNullOrWhiteSpace($OutputDir)) { + $OutputDir = Join-Path ([System.IO.Path]::GetTempPath()) "copilot-uitests-$BuildId" +} +New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null + +# --------- Source Get-TrxResults --------- +$trxHelperPath = Join-Path $PSScriptRoot "Get-TrxResults.ps1" +if (Test-Path $trxHelperPath) { + . $trxHelperPath +} else { + throw "Get-TrxResults.ps1 not found at $trxHelperPath" +} + +# Map artifact name → matrix category. Job names look like: +# android_ui_tests_controls_30_ +# ios_ui_tests_mono_controls_latest_ +# winui_ui_tests_controls_ +# mac_ui_tests_controls_ +function Get-CategoryFromArtifactName { + param([string]$ArtifactName) + + # Pattern: drop--- + $stagePrefixes = @( + 'android_ui_tests', 'android_ui_tests_coreclr', 'android_ui_tests_material3', + 'ios_ui_tests_mono', 'ios_ui_tests_mono_cv1', 'ios_ui_tests_mono_carv1', + 'ios_ui_tests_nativeaot', + 'winui_ui_tests', 'mac_ui_tests' + ) + + $name = $ArtifactName -replace '^drop-', '' -replace '-\d+$', '' + + foreach ($sp in $stagePrefixes | Sort-Object Length -Descending) { + if ($name -match "^${sp}-(.+)$") { + return $Matches[1].Trim() + } + } + return $name +} + +# Walk a pre-populated OutputDir: find all .trx files (one per matrix +# job's drop-* artifact) and merge by category. Pure function — no az +# calls — so it can be tested with synthetic fixtures. +function Get-AggregatedTrxFromDirectory { + param([string]$RootDir) + + $byCategory = @{} + if (-not (Test-Path $RootDir)) { + return $byCategory + } + $trxFiles = @(Get-ChildItem -Path $RootDir -Filter "*.trx" -Recurse -ErrorAction SilentlyContinue) + Write-Host " Found $($trxFiles.Count) TRX file(s) under $RootDir" -ForegroundColor Gray + + foreach ($trx in $trxFiles) { + $trxResult = Get-TrxResults -TrxPath $trx.FullName + if (-not $trxResult) { continue } + + $relative = $trx.FullName.Substring($RootDir.Length).TrimStart('/','\') + $artName = $relative.Split([System.IO.Path]::DirectorySeparatorChar)[0] + $category = Get-CategoryFromArtifactName -ArtifactName $artName + + if (-not $byCategory.ContainsKey($category)) { + $byCategory[$category] = @{ + Total = 0 + Passed = 0 + Failed = 0 + Skipped = 0 + Results = @() + TrxPaths = @() + ArtifactName = $artName + } + } + $cur = $byCategory[$category] + $cur.Total += [int]$trxResult.Total + $cur.Passed += [int]$trxResult.Passed + $cur.Failed += [int]$trxResult.Failed + $cur.Skipped += [int]$trxResult.Skipped + $cur.Results = @($cur.Results) + @($trxResult.Results) + $cur.TrxPaths = @($cur.TrxPaths) + @($trx.FullName) + $byCategory[$category] = $cur + } + + return $byCategory +} + +# --------- List artifacts on the build --------- +Write-Host "Aggregate-UITestArtifacts: listing artifacts for build #$BuildId" -ForegroundColor Cyan +$artifactsRaw = az pipelines runs artifact list ` + --org $Org --project $Project --run-id $BuildId -o json 2>$null +if ($LASTEXITCODE -ne 0 -or -not $artifactsRaw) { + Write-Host " ⚠️ Failed to list artifacts; falling back to walking $OutputDir directly" -ForegroundColor Yellow + return Get-AggregatedTrxFromDirectory -RootDir $OutputDir +} +$artifacts = $artifactsRaw | ConvertFrom-Json + +# Match drop-* (one per platform job) — that's where ui-tests-steps.yml's +# PublishBuildArtifacts@1 lands. Skip CopilotLogs / BuildLogs / etc. +# Also accept legacy names like "- (attempt N)" which the +# template's PublishBuildArtifacts step uses by default. +$dropArtifacts = @($artifacts | Where-Object { + $_.name -match '^drop-' -or + $_.name -match '^ui-tests-samples' -or + $_.name -match '\(attempt \d+\)$' +}) +Write-Host " Found $($dropArtifacts.Count) drop/test artifact(s) on build #$BuildId" -ForegroundColor Gray + +if ($dropArtifacts.Count -eq 0) { + Write-Host " ⚠️ No drop-* artifacts — child build may not have reached test execution stage" -ForegroundColor Yellow + return @{} +} + +# --------- Download each artifact --------- +foreach ($art in $dropArtifacts) { + $artDir = Join-Path $OutputDir $art.name + if (Test-Path $artDir) { continue } # already downloaded + Write-Host " ⬇ $($art.name)" -ForegroundColor DarkGray + az pipelines runs artifact download ` + --org $Org --project $Project --run-id $BuildId ` + --artifact-name $art.name --path $artDir 2>&1 | Out-Null + if ($LASTEXITCODE -ne 0) { + Write-Host " ⚠ download failed for $($art.name)" -ForegroundColor Yellow + } +} + +# --------- Walk all .trx files --------- +$byCategory = Get-AggregatedTrxFromDirectory -RootDir $OutputDir + +Write-Host "Aggregate-UITestArtifacts: aggregated $($byCategory.Count) category bucket(s)" -ForegroundColor Cyan +foreach ($k in $byCategory.Keys | Sort-Object) { + $b = $byCategory[$k] + Write-Host " ${k}: total=$($b.Total) passed=$($b.Passed) failed=$($b.Failed) skipped=$($b.Skipped) (from $($b.TrxPaths.Count) TRX file(s))" -ForegroundColor Gray +} + +return $byCategory diff --git a/.github/scripts/shared/Build-AndDeploy.ps1 b/.github/scripts/shared/Build-AndDeploy.ps1 index ae81e05a1ea8..ee490316d546 100644 --- a/.github/scripts/shared/Build-AndDeploy.ps1 +++ b/.github/scripts/shared/Build-AndDeploy.ps1 @@ -85,7 +85,7 @@ if ($Platform -eq "android") { Write-Info "Build command: dotnet build $($buildArgs -join ' ')" $buildStartTime = Get-Date - $maxAttempts = 2 + $maxAttempts = 3 $buildExitCode = 1 for ($attempt = 1; $attempt -le $maxAttempts; $attempt++) { @@ -104,11 +104,31 @@ if ($Platform -eq "android") { # Restart ADB server to recover from broken pipe / transient errors Write-Info "Restarting ADB server..." & adb kill-server 2>$null - Start-Sleep -Seconds 2 + Start-Sleep -Seconds 3 & adb start-server - Start-Sleep -Seconds 2 - & adb wait-for-device Start-Sleep -Seconds 3 + + # Wait for device and verify emulator is fully responsive + Write-Info "Waiting for device to be fully ready..." + & adb wait-for-device + Start-Sleep -Seconds 5 + + # Verify package manager is responsive before retrying build + $pmReady = $false + for ($pmCheck = 1; $pmCheck -le 10; $pmCheck++) { + $pmOutput = & adb shell pm list packages -3 2>&1 + if ($LASTEXITCODE -eq 0 -and $pmOutput -notmatch 'Broken pipe|error') { + $pmReady = $true + Write-Info "Package manager responsive (check $pmCheck)" + break + } + Write-Warn "Package manager not ready (check $pmCheck/10), waiting..." + Start-Sleep -Seconds 3 + } + + if (-not $pmReady) { + Write-Warn "Package manager still unresponsive — attempting build anyway" + } } & dotnet build @buildArgs diff --git a/.github/scripts/shared/Get-AggregatedTrxFromDirectory.ps1 b/.github/scripts/shared/Get-AggregatedTrxFromDirectory.ps1 new file mode 100644 index 000000000000..bba2c0e8dd02 --- /dev/null +++ b/.github/scripts/shared/Get-AggregatedTrxFromDirectory.ps1 @@ -0,0 +1,41 @@ +function Get-AggregatedTrxFromDirectory { + param([string]$RootDir) + + $byCategory = @{} + if (-not (Test-Path $RootDir)) { + return $byCategory + } + $trxFiles = @(Get-ChildItem -Path $RootDir -Filter "*.trx" -Recurse -ErrorAction SilentlyContinue) + Write-Host " Found $($trxFiles.Count) TRX file(s) under $RootDir" -ForegroundColor Gray + + foreach ($trx in $trxFiles) { + $trxResult = Get-TrxResults -TrxPath $trx.FullName + if (-not $trxResult) { continue } + + $relative = $trx.FullName.Substring($RootDir.Length).TrimStart('/','\') + $artName = $relative.Split([System.IO.Path]::DirectorySeparatorChar)[0] + $category = Get-CategoryFromArtifactName -ArtifactName $artName + + if (-not $byCategory.ContainsKey($category)) { + $byCategory[$category] = @{ + Total = 0 + Passed = 0 + Failed = 0 + Skipped = 0 + Results = @() + TrxPaths = @() + ArtifactName = $artName + } + } + $cur = $byCategory[$category] + $cur.Total += [int]$trxResult.Total + $cur.Passed += [int]$trxResult.Passed + $cur.Failed += [int]$trxResult.Failed + $cur.Skipped += [int]$trxResult.Skipped + $cur.Results = @($cur.Results) + @($trxResult.Results) + $cur.TrxPaths = @($cur.TrxPaths) + @($trx.FullName) + $byCategory[$category] = $cur + } + + return $byCategory +} diff --git a/.github/scripts/shared/Get-CategoryFromArtifactName.ps1 b/.github/scripts/shared/Get-CategoryFromArtifactName.ps1 new file mode 100644 index 000000000000..0aa90c0bc3cb --- /dev/null +++ b/.github/scripts/shared/Get-CategoryFromArtifactName.ps1 @@ -0,0 +1,32 @@ +function Get-CategoryFromArtifactName { + param([string]$ArtifactName) + + # Pattern: drop--- + # Stage 2 uses: drop-_ui_tests-controls- + # where platform is the literal CI parameter (android, ios, catalyst, windows). + # Legacy CI stages use different names (ios_ui_tests_mono, winui_ui_tests, etc.). + $stagePrefixes = @( + # Stage 2 literal platform naming (from ci-copilot.yml) + 'android_ui_tests-controls', 'ios_ui_tests-controls', + 'catalyst_ui_tests-controls', 'windows_ui_tests-controls', + # Legacy CI stage naming with controls infix + 'android_ui_tests_coreclr-controls', 'android_ui_tests_material3-controls', + 'ios_ui_tests_mono-controls', 'ios_ui_tests_mono_cv1-controls', 'ios_ui_tests_mono_carv1-controls', + 'ios_ui_tests_nativeaot-controls', + 'winui_ui_tests-controls', 'mac_ui_tests-controls', + # Legacy CI stage naming (without controls infix) + 'android_ui_tests', 'android_ui_tests_coreclr', 'android_ui_tests_material3', + 'ios_ui_tests_mono', 'ios_ui_tests_mono_cv1', 'ios_ui_tests_mono_carv1', + 'ios_ui_tests_nativeaot', + 'winui_ui_tests', 'mac_ui_tests' + ) + + $name = $ArtifactName -replace '^drop-', '' -replace '-\d+$', '' + + foreach ($sp in $stagePrefixes | Sort-Object Length -Descending) { + if ($name -match "^${sp}-(.+)$") { + return $Matches[1].Trim() + } + } + return $name +} diff --git a/.github/scripts/shared/Get-EnvErrorPatterns.ps1 b/.github/scripts/shared/Get-EnvErrorPatterns.ps1 new file mode 100644 index 000000000000..36d18ff76bed --- /dev/null +++ b/.github/scripts/shared/Get-EnvErrorPatterns.ps1 @@ -0,0 +1,27 @@ +function Get-EnvErrorPatterns { + <# + .SYNOPSIS + Single source of truth for environment-error patterns that trigger retry. + .DESCRIPTION + Returns an array of regex patterns that identify transient environment + errors (as opposed to real test failures). Used by Invoke-UITestWithRetry, + Review-PR.ps1 STEP 3, and the Gate (verify-tests-fail.ps1) to make + identical retry decisions. + #> + return @( + 'error ADB0010.*InstallFailedException', + 'InstallFailedException', + 'Failure calling service package', + 'Broken pipe', + 'XHarness exit code:\s*83', + 'Application test run crashed', + 'SIGABRT.*load_aot_module', + 'AppiumServerHasNotBeenStartedLocally', + 'no such element.*could not be located', + 'no devices/emulators found', + 'device offline', + 'Could not connect to device', + 'Failed to launch the application', + 'cmd: Failure' + ) +} diff --git a/.github/scripts/shared/Get-TrxResults.ps1 b/.github/scripts/shared/Get-TrxResults.ps1 new file mode 100644 index 000000000000..974c5ee15974 --- /dev/null +++ b/.github/scripts/shared/Get-TrxResults.ps1 @@ -0,0 +1,78 @@ +function Get-TrxResults { + param([string]$TrxPath) + + if (-not $TrxPath -or -not (Test-Path $TrxPath)) { + return $null + } + + try { + [xml]$trx = Get-Content -Path $TrxPath -Raw -Encoding UTF8 + } catch { + Write-Host " ⚠️ Failed to parse TRX $TrxPath : $_" -ForegroundColor Yellow + return $null + } + + # The TRX is in the VSTest namespace. Set up an XmlNamespaceManager so we + # can address nodes regardless of prefix. + $ns = New-Object System.Xml.XmlNamespaceManager($trx.NameTable) + $ns.AddNamespace('t', 'http://microsoft.com/schemas/VisualStudio/TeamTest/2010') + + # Counters live on + $countersNode = $trx.SelectSingleNode('//t:ResultSummary/t:Counters', $ns) + $total = 0; $passed = 0; $failed = 0; $skipped = 0 + if ($countersNode) { + $total = [int]($countersNode.GetAttribute('total')) + $passed = [int]($countersNode.GetAttribute('passed')) + $failed = [int]($countersNode.GetAttribute('failed')) + # Skipped is "executed - passed - failed" if not separately tracked. + $executed = [int]($countersNode.GetAttribute('executed')) + $skipped = [Math]::Max(0, $total - $executed) + } + + $entries = New-Object System.Collections.ArrayList + $resultNodes = $trx.SelectNodes('//t:UnitTestResult', $ns) + foreach ($r in $resultNodes) { + $rawName = $r.GetAttribute('testName') + # Use the raw test name as-is from TRX. + $name = $rawName + + $outcomeAttr = $r.GetAttribute('outcome') + $status = switch ($outcomeAttr) { + 'Passed' { 'Passed' } + 'Failed' { 'Failed' } + 'NotExecuted' { 'Skipped' } + 'Inconclusive' { 'Skipped' } + # Map all other outcomes (Aborted, Timeout, Error, Disconnected, + # Warning, Pending) to Failed so they appear in failure disclosures + # and match the TRX Counters/failed count. + default { 'Failed' } + } + $duration = $r.GetAttribute('duration') + + $err = ''; $stack = '' + $errInfo = $r.SelectSingleNode('t:Output/t:ErrorInfo', $ns) + if ($errInfo) { + $msgNode = $errInfo.SelectSingleNode('t:Message', $ns) + $stackNode = $errInfo.SelectSingleNode('t:StackTrace', $ns) + if ($msgNode) { $err = $msgNode.InnerText.Trim() } + if ($stackNode) { $stack = $stackNode.InnerText.Trim() } + } + + [void]$entries.Add([ordered]@{ + status = $status + name = $name + duration = $duration + error = $err + stack = $stack + }) + } + + return @{ + Total = $total + Passed = $passed + Failed = $failed + Skipped = $skipped + Results = @($entries.ToArray()) + TrxPath = $TrxPath + } +} diff --git a/.github/scripts/shared/Invoke-UITestWithRetry.ps1 b/.github/scripts/shared/Invoke-UITestWithRetry.ps1 new file mode 100644 index 000000000000..9a0f0bd2f32a --- /dev/null +++ b/.github/scripts/shared/Invoke-UITestWithRetry.ps1 @@ -0,0 +1,248 @@ +#!/usr/bin/env pwsh +<# +.SYNOPSIS + Single source of truth for "build + deploy + run UI tests" with the same + deploy/retry/recovery technique the Gate (verify-tests-fail.ps1) uses. + +.DESCRIPTION + Both the Gate (Phase 5) and STEP 3 (UI Test Execution Results) need to: + 1. Pre-boot a single shared device/simulator (Start-Emulator.ps1) + 2. Invoke BuildAndRunHostApp.ps1 with the booted UDID so it doesn't + try to start its own device or race with another booted one + 3. Detect environment errors in the captured output (ADB broken pipe, + XHarness exit 83, AOT loader crash, missing devices, etc.) + 4. Retry up to N times with a backoff sleep, rebooting the device on + Android/iOS app-launch failures + 5. Return both the captured stdout (for downstream parsing) and the + exit code, plus a flag indicating whether the persistent failure + was an environment problem vs a real test failure. + + Until this script existed, STEP 3 just called BuildAndRunHostApp.ps1 + once with no preflight or retry, so a single ADB "Broken pipe" install + failure would cause every NUnit test in the fixture to OneTimeSetUp- + timeout and the AI summary would falsely report 100+ regressions. + + The Gate's verify-tests-fail.ps1 will be updated to delegate UI test + runs to this script in a follow-up — for now it inlines the same logic + in Invoke-TestRun + Invoke-TestRunWithRetry. The patterns and behaviour + here are kept intentionally identical to those functions so consumers + behave identically across both paths. + +.PARAMETER Platform + Target platform: android | ios | maccatalyst | catalyst | windows + +.PARAMETER Category + Optional category name to pass to BuildAndRunHostApp.ps1 -Category + +.PARAMETER TestFilter + Optional NUnit/xUnit filter to pass to BuildAndRunHostApp.ps1 -TestFilter + +.PARAMETER MaxAttempts + Maximum retry attempts on environment errors (default: 3) + +.PARAMETER RetryDelaySeconds + Sleep between retries (default: 30) + +.PARAMETER DeviceUdid + Optional pre-booted device UDID. When omitted, this script boots one via + Start-Emulator.ps1 (Android/iOS only). + +.PARAMETER LogFile + Optional path to capture full stdout for downstream parsing. + +.OUTPUTS + Hashtable: + Output : raw output array (every captured element preserved + line-by-line — multi-line ErrorRecords are split so + downstream parsers see one element per actual line) + ExitCode : final attempt's $LASTEXITCODE + Attempts : number of attempts made + EnvErrorHit : last env-error pattern matched (or $null if none) + DeviceUdid : the device UDID used (caller may want to share/reset) +#> + +[CmdletBinding()] +param( + [Parameter(Mandatory=$true)] [string] $Platform, + [string] $Category, + [string] $TestFilter, + [int] $MaxAttempts = 3, + [int] $RetryDelaySeconds = 30, + [string] $DeviceUdid, + [string] $LogFile, + [string] $RepoRoot +) + +$ErrorActionPreference = 'Continue' + +if (-not $RepoRoot) { + $RepoRoot = git rev-parse --show-toplevel 2>$null + if (-not $RepoRoot) { $RepoRoot = (Get-Location).Path } +} + +# Load shared env-error patterns (single source of truth). +$sharedPatternsScript = Join-Path $PSScriptRoot "Get-EnvErrorPatterns.ps1" +if (-not (Test-Path $sharedPatternsScript)) { + throw "Get-EnvErrorPatterns.ps1 not found at $sharedPatternsScript — env-error retry requires the shared pattern file." +} +. $sharedPatternsScript +$envErrorPatterns = Get-EnvErrorPatterns + +# ── Step 1: pre-boot the device once (same as Gate's Invoke-TestRun) ────── +$bootedUdid = $DeviceUdid +$emulatorPlatform = switch ($Platform) { + 'catalyst' { $null } + 'maccatalyst' { $null } + 'windows' { $null } + default { $Platform } +} + +if ($emulatorPlatform -and -not $bootedUdid) { + Write-Host "🔹 Booting $Platform device/simulator (Start-Emulator.ps1)..." -ForegroundColor Cyan + $startEmu = Join-Path $RepoRoot ".github/scripts/shared/Start-Emulator.ps1" + if (Test-Path $startEmu) { + try { + $bootedUdid = & $startEmu -Platform $emulatorPlatform + if ($LASTEXITCODE -eq 0 -and $bootedUdid) { + Write-Host "✅ Device ready: $bootedUdid" -ForegroundColor Green + } else { + Write-Host "⚠️ Start-Emulator.ps1 returned exit $LASTEXITCODE; falling back to BuildAndRunHostApp internal device boot" -ForegroundColor Yellow + $bootedUdid = $null + } + } catch { + Write-Host "⚠️ Start-Emulator.ps1 threw: $_" -ForegroundColor Yellow + $bootedUdid = $null + } + } else { + Write-Host "⚠️ Start-Emulator.ps1 not found — letting BuildAndRunHostApp.ps1 boot its own device" -ForegroundColor Yellow + } +} + +# ── Step 2: build the BuildAndRunHostApp parameter set ──────────────────── +$buildScript = Join-Path $RepoRoot ".github/scripts/BuildAndRunHostApp.ps1" +if (-not (Test-Path $buildScript)) { + throw "BuildAndRunHostApp.ps1 not found at: $buildScript" +} + +$baseParams = @{ Platform = $Platform } +if ($Category) { $baseParams.Category = $Category } +if ($TestFilter) { $baseParams.TestFilter = $TestFilter } +if ($bootedUdid) { $baseParams.DeviceUdid = $bootedUdid } + +# ── Step 3: retry loop on environment errors (same as Gate's +# Invoke-TestRunWithRetry, including device reboot between attempts) ─── +$attempts = 0 +$lastOutput = @() +$lastExit = -1 +$envHit = $null + +for ($attempt = 1; $attempt -le $MaxAttempts; $attempt++) { + $attempts = $attempt + if ($attempt -gt 1) { + Write-Host "↻ Attempt $attempt/$MaxAttempts after environment error '$envHit'" -ForegroundColor Yellow + + # Same recovery as Gate's Invoke-TestRunWithRetry + if ($Platform -eq 'android') { + try { + Write-Host "🔄 adb reboot to recover" -ForegroundColor Yellow + if ($bootedUdid) { + & adb -s $bootedUdid reboot 2>$null | Out-Null + & adb -s $bootedUdid wait-for-device 2>$null | Out-Null + } else { + & adb reboot 2>$null | Out-Null + & adb wait-for-device 2>$null | Out-Null + } + } catch { + Write-Host "(adb reboot failed: $_)" -ForegroundColor DarkGray + } + } elseif ($Platform -in @('ios','catalyst','maccatalyst')) { + $sim = $bootedUdid + if (-not $sim) { + try { + $boot = & xcrun simctl list devices booted 2>$null | Select-String -Pattern '\(([0-9A-F-]{36})\)' | Select-Object -First 1 + if ($boot) { $sim = $boot.Matches.Groups[1].Value } + } catch { } + } + if ($sim) { + try { + Write-Host "🔄 simctl shutdown/boot $sim" -ForegroundColor Yellow + & xcrun simctl shutdown $sim 2>$null | Out-Null + Start-Sleep -Seconds 5 + & xcrun simctl boot $sim 2>$null | Out-Null + } catch { + Write-Host "(simctl reboot failed: $_)" -ForegroundColor DarkGray + } + } + } + Start-Sleep -Seconds $RetryDelaySeconds + } + + $envHit = $null + Write-Host "▶ BuildAndRunHostApp.ps1 attempt $attempt/$MaxAttempts" -ForegroundColor Cyan + $lastOutput = & $buildScript @baseParams 2>&1 + $lastExit = $LASTEXITCODE + + if ($lastExit -eq 0) { break } + + # Same env-error scan as Get-TestResultFromOutput in the Gate. + $joined = ($lastOutput | ForEach-Object { "$_" }) -join "`n" + foreach ($p in $envErrorPatterns) { + if ($joined -match $p) { $envHit = $p; break } + } + if (-not $envHit) { break } # real test failure — no point retrying + if ($attempt -eq $MaxAttempts) { + Write-Host "⚠️ Env error '$envHit' persisted after $MaxAttempts attempts" -ForegroundColor Yellow + } +} + +# ── Normalize the captured output: PowerShell's `& cmd 2>&1` wraps multi-line +# stderr blocks as single ErrorRecord/string elements with embedded \n. +# The downstream Get-DotNetTestResults regex is anchored ^...$ (start/end +# of STRING), so without splitting, a multi-line element gets misparsed +# and a 100+-test fixture can collapse into one bogus result with all +# names concatenated. We split each element here so every consumer sees +# one true line per array element. ── +$normalized = @( + $lastOutput | ForEach-Object { + $s = "$_" + if ($s.Contains("`n") -or $s.Contains("`r")) { + $s -split "`r`n|`n|`r" + } else { + $s + } + } +) + +if ($LogFile) { + try { + $dir = Split-Path -Parent $LogFile + if ($dir -and -not (Test-Path $dir)) { New-Item -ItemType Directory -Path $dir -Force | Out-Null } + $normalized | Out-File -FilePath $LogFile -Encoding utf8 + } catch { + Write-Host "⚠️ Failed to write $LogFile : $_" -ForegroundColor Yellow + } +} + +# ── Surface the TRX path so STEP 3 can parse authoritative test results ── +# BuildAndRunHostApp.ps1 prints a marker line `>>> TRX_RESULT_FILE: ` +# (matching the format `RunTestWithLocalDotNet` would have produced via +# Cake). Pull it out here so callers don't have to re-scan the output. +$trxResultFile = $null +foreach ($line in $normalized) { + $s = "$line" + if ($s -match '^\s*>>>\s*TRX_RESULT_FILE:\s*(.+?)\s*$') { + $candidate = $matches[1].Trim() + if (Test-Path $candidate) { + $trxResultFile = $candidate + } + } +} + +return @{ + Output = $normalized + ExitCode = $lastExit + Attempts = $attempts + EnvErrorHit = $envHit + DeviceUdid = $bootedUdid + TrxResultFile = $trxResultFile +} diff --git a/.github/scripts/shared/Start-Emulator.ps1 b/.github/scripts/shared/Start-Emulator.ps1 index 33f9c8687a54..83f62f356989 100644 --- a/.github/scripts/shared/Start-Emulator.ps1 +++ b/.github/scripts/shared/Start-Emulator.ps1 @@ -363,16 +363,19 @@ if ($Platform -eq "android") { Write-Info "Auto-detecting iOS simulator..." $simList = xcrun simctl list devices available --json | ConvertFrom-Json - # Preferred iOS versions in order (stable preferred, beta fallback) - $preferredVersions = @("iOS-18", "iOS-17", "iOS-26") + # Preferred iOS versions in order — match main CI ui-tests pipeline (defaultiOSVersion: '26.0') + # iOS 26 snapshots live in src/Controls/tests/TestCases.iOS.Tests/snapshots/ios-26 + # and UITest.cs selects ios-26 environment when platformVersion starts with "26." + $preferredVersions = @("iOS-26", "iOS-18", "iOS-17") # Preferred devices per iOS version to match CI configuration: - # iOS 18.x → iPhone Xs (matches CI default in UITest.cs) - # iOS 26.x → iPhone 11 Pro (matches CI visual test requirement) + # iOS 26.x → iPhone Xs / iPhone 16 Pro (snapshots in /ios-26 baseline are device-agnostic per UITest.cs:367) + # iOS 18.x → iPhone Xs (matches /ios baseline default) # iOS 17.x → iPhone Xs (fallback) $preferredDevicesPerVersion = @{ + # iPhone 11 Pro first for iOS-26: baselines captured at 1124x1126 resolution + "iOS-26" = @("iPhone 11 Pro", "iPhone Xs", "iPhone 16 Pro", "iPhone 15 Pro") "iOS-18" = @("iPhone Xs", "iPhone 16 Pro", "iPhone 15 Pro", "iPhone 14 Pro") "iOS-17" = @("iPhone Xs", "iPhone 15 Pro", "iPhone 14 Pro") - "iOS-26" = @("iPhone 11 Pro", "iPhone 16 Pro", "iPhone 15 Pro") } $selectedDevice = $null @@ -382,8 +385,11 @@ if ($Platform -eq "android") { foreach ($version in $preferredVersions) { if ($selectedDevice) { break } - # Get all runtimes matching this version prefix, sorted by version descending - # so the latest minor version is preferred (e.g., iOS-18-5 before iOS-18-3) + # Get all runtimes matching this version prefix. + # Sort descending so the HIGHEST minor version wins (e.g. iOS-26-4 + # over iOS-26-0). AcesShared agents ship iOS 26.4 pre-installed and + # PR #35061 resaved ios-26 baselines for 26.4 — using an older + # runtime (26.0) causes pixel-diff failures on every visual test. $matchingRuntimes = $simList.devices.PSObject.Properties | Where-Object { $_.Name -match $version } | Sort-Object { $_.Name } -Descending @@ -411,7 +417,56 @@ if ($Platform -eq "android") { } } - # If no preferred device found, take first available iPhone + # If no preferred device found, attempt to CREATE the right-size + # device for visual snapshot tests instead of falling back to a + # random iPhone (which would have wrong screen dimensions and + # cause every visual test to fail with "size differs"). + # + # Resolution mapping (must match snapshots// baselines): + # iOS-26 baselines: 1124x1126 → iPhone 11 Pro / iPhone Xs (1125x2436 device) + # iOS-18 baselines: matches iPhone Xs default + # iOS-17 baselines: matches iPhone Xs + if (-not $selectedDevice) { + $createDevice = $null + $createDeviceTypeId = $null + if ($version -eq "iOS-26") { + $createDevice = "iPhone 11 Pro" + $createDeviceTypeId = "com.apple.CoreSimulator.SimDeviceType.iPhone-11-Pro" + } + elseif ($version -eq "iOS-18" -or $version -eq "iOS-17") { + $createDevice = "iPhone Xs" + $createDeviceTypeId = "com.apple.CoreSimulator.SimDeviceType.iPhone-Xs" + } + + if ($createDevice -and $matchingRuntimes) { + $createRuntime = $matchingRuntimes[0].Name + Write-Info "No preferred device pre-installed for $version; creating $createDevice on $createRuntime to match snapshot baselines..." + $createOutput = & xcrun simctl create $createDevice $createDeviceTypeId $createRuntime 2>&1 + if ($LASTEXITCODE -eq 0 -and $createOutput -match '^[0-9A-F-]{36}$') { + $newUdid = $createOutput.Trim() + Write-Info "Created $createDevice : $newUdid" + # Re-query so we have the full device object + $simList = xcrun simctl list devices available --json | ConvertFrom-Json + $found = $null + foreach ($rtProp in $simList.devices.PSObject.Properties) { + if ($rtProp.Name -eq $createRuntime) { + $found = $rtProp.Value | Where-Object { $_.udid -eq $newUdid } | Select-Object -First 1 + if ($found) { + $selectedDevice = $found + $selectedVersion = $rtProp.Name + break + } + } + } + } + else { + Write-Info "Failed to create $createDevice on $createRuntime`: $createOutput" + } + } + } + + # Last-resort: take first available iPhone (visual tests will likely + # report 'size differs' but at least non-visual tests can run) if (-not $selectedDevice) { $anyiPhone = $null $iphoneRuntime = $null @@ -427,7 +482,7 @@ if ($Platform -eq "android") { if ($anyiPhone) { $selectedDevice = $anyiPhone $selectedVersion = $iphoneRuntime - Write-Info "Using available iPhone: $($anyiPhone.name) on $selectedVersion" + Write-Info "Using available iPhone (resolution may not match snapshot baselines): $($anyiPhone.name) on $selectedVersion" } } } @@ -511,5 +566,8 @@ if ($Platform -eq "android") { $env:DEVICE_UDID = $DeviceUdid Write-Success "DEVICE_UDID environment variable set: $DeviceUdid" +# Ensure clean exit code (adb commands above may leave $LASTEXITCODE non-zero) +$global:LASTEXITCODE = 0 + # Return UDID for callers return $DeviceUdid diff --git a/.github/scripts/tests/Test-FindRegressionRisks.ps1 b/.github/scripts/tests/Test-FindRegressionRisks.ps1 new file mode 100644 index 000000000000..ddc323551171 --- /dev/null +++ b/.github/scripts/tests/Test-FindRegressionRisks.ps1 @@ -0,0 +1,418 @@ +#!/usr/bin/env pwsh +<# +.SYNOPSIS + Tests for Find-RegressionRisks.ps1 + +.DESCRIPTION + Validates the regression cross-reference algorithm: diff parsing, trivial-line + filtering, whitespace normalization, REVERT/OVERLAP/CLEAN classification, and + output file generation. Tests use fixture data to avoid gh/git API calls. + +.EXAMPLE + ./Test-FindRegressionRisks.ps1 +#> + +param( + [switch]$Verbose +) + +$ErrorActionPreference = "Stop" +$RepoRoot = git rev-parse --show-toplevel +$ScriptPath = Join-Path $RepoRoot ".github/scripts/Find-RegressionRisks.ps1" + +# Test tracking +$script:TestsPassed = 0 +$script:TestsFailed = 0 +$script:TestsSkipped = 0 + +function Write-TestResult { + param( + [string]$TestName, + [bool]$Passed, + [string]$Message = "" + ) + if ($Passed) { + Write-Host " [PASS] $TestName" -ForegroundColor Green + $script:TestsPassed++ + } else { + Write-Host " [FAIL] $TestName" -ForegroundColor Red + if ($Message) { Write-Host " $Message" -ForegroundColor Yellow } + $script:TestsFailed++ + } +} + +function Write-TestSkipped { + param([string]$TestName, [string]$Reason) + Write-Host " [SKIP] $TestName - $Reason" -ForegroundColor Yellow + $script:TestsSkipped++ +} + +function Test-Section { + param([string]$Name) + Write-Host "" + Write-Host "=== $Name ===" -ForegroundColor Cyan +} + +# ============================================================ +# Load helper functions from the script via dot-source +# ============================================================ + +# We dot-source the script in a constrained way: override the param block +# by extracting just the function definitions. This avoids running Main. + +Test-Section "Script Existence" +Write-TestResult "Find-RegressionRisks.ps1 exists" (Test-Path $ScriptPath) + +# Extract function definitions by parsing the script AST +Test-Section "Function Extraction" + +$ast = [System.Management.Automation.Language.Parser]::ParseFile($ScriptPath, [ref]$null, [ref]$null) +$functions = $ast.FindAll({ $args[0] -is [System.Management.Automation.Language.FunctionDefinitionAst] }, $false) + +foreach ($fn in $functions) { + # Define each function in this scope + Invoke-Expression $fn.Extent.Text +} + +$expectedFunctions = @( + 'Write-Banner', 'ConvertTo-NormalizedLine', 'Test-IsImplementationFile', + 'Get-PRDiffText', 'Get-DiffLinesByFile', 'Test-IsTrivialLine', + 'Test-IsBugFixLabel', 'Get-LinkedIssueNumbers', 'Get-PRMetadataIfBugFix' +) +foreach ($name in $expectedFunctions) { + Write-TestResult "Function '$name' extracted" ($null -ne (Get-Command $name -ErrorAction SilentlyContinue)) +} + +# ============================================================ +# Test: ConvertTo-NormalizedLine +# ============================================================ +Test-Section "ConvertTo-NormalizedLine" + +Write-TestResult "Collapses tabs to single space" ( + (ConvertTo-NormalizedLine "`t`tint x = 1;") -eq "int x = 1;" +) +Write-TestResult "Collapses multiple spaces" ( + (ConvertTo-NormalizedLine " int x = 1; ") -eq "int x = 1;" +) +Write-TestResult "Trims leading/trailing whitespace" ( + (ConvertTo-NormalizedLine " hello ") -eq "hello" +) +Write-TestResult "Empty string stays empty" ( + (ConvertTo-NormalizedLine "") -eq "" +) + +# ============================================================ +# Test: Test-IsImplementationFile +# ============================================================ +Test-Section "Test-IsImplementationFile" + +Write-TestResult "Accepts .cs file" (Test-IsImplementationFile "src/Controls/src/Core/Button.cs") +Write-TestResult "Accepts .xaml file" (Test-IsImplementationFile "src/Controls/src/Core/Views/Button.xaml") +Write-TestResult "Rejects .csproj" (-not (Test-IsImplementationFile "src/Controls/src/Core/Controls.csproj")) +Write-TestResult "Rejects test file" (-not (Test-IsImplementationFile "src/Controls/tests/UnitTests/ButtonTests.cs")) +Write-TestResult "Rejects TestCases file" (-not (Test-IsImplementationFile "src/Controls/tests/TestCases.HostApp/Issue123.cs")) +Write-TestResult "Rejects .Designer.cs" (-not (Test-IsImplementationFile "src/Resources.Designer.cs")) +Write-TestResult "Rejects .g.cs" (-not (Test-IsImplementationFile "src/Generated.g.cs")) +Write-TestResult "Rejects samples" (-not (Test-IsImplementationFile "src/Controls/samples/Sample/MainPage.cs")) + +# ============================================================ +# Test: Test-IsTrivialLine +# ============================================================ +Test-Section "Test-IsTrivialLine" + +Write-TestResult "Empty string is trivial" (Test-IsTrivialLine "") +Write-TestResult "Whitespace only is trivial" (Test-IsTrivialLine " ") +Write-TestResult "Short token is trivial" (Test-IsTrivialLine "{ }") +Write-TestResult "Brace-only is trivial" (Test-IsTrivialLine "{ } ;") +Write-TestResult "Return statement is trivial" (Test-IsTrivialLine "return;") +Write-TestResult "Break is trivial" (Test-IsTrivialLine "break;") +Write-TestResult "Using directive is trivial" (Test-IsTrivialLine "using System.Linq;") +Write-TestResult "Comment is trivial" (Test-IsTrivialLine "// This is a comment") +Write-TestResult "Actual code is NOT trivial" (-not (Test-IsTrivialLine "var handler = new ViewHandler();")) +Write-TestResult "Method call is NOT trivial" (-not (Test-IsTrivialLine "parent.SetPadding(left, top, right, bottom);")) + +# ============================================================ +# Test: Test-IsBugFixLabel +# ============================================================ +Test-Section "Test-IsBugFixLabel" + +Write-TestResult "i/regression matches" (Test-IsBugFixLabel "i/regression") +Write-TestResult "t/bug matches" (Test-IsBugFixLabel "t/bug") +Write-TestResult "p/0 matches" (Test-IsBugFixLabel "p/0") +Write-TestResult "p/1 matches" (Test-IsBugFixLabel "p/1") +Write-TestResult "t/enhancement does NOT match" (-not (Test-IsBugFixLabel "t/enhancement")) +Write-TestResult "area/controls does NOT match" (-not (Test-IsBugFixLabel "area/controls")) +Write-TestResult "p/2 does NOT match" (-not (Test-IsBugFixLabel "p/2")) + +# ============================================================ +# Test: Get-LinkedIssueNumbers +# ============================================================ +Test-Section "Get-LinkedIssueNumbers" + +$body1 = "Fixes #12345`nCloses #67890" +$linked1 = Get-LinkedIssueNumbers $body1 +Write-TestResult "Finds Fixes #N" ($linked1 -contains 12345) +Write-TestResult "Finds Closes #N" ($linked1 -contains 67890) + +$body2 = "Resolves https://github.com/dotnet/maui/issues/99999" +$linked2 = Get-LinkedIssueNumbers $body2 +Write-TestResult "Finds full URL" ($linked2 -contains 99999) + +$body3 = "- #111`n- #222`n- #333" +$linked3 = Get-LinkedIssueNumbers $body3 +Write-TestResult "Finds bullet list issues" ($linked3.Count -ge 3) + +$body4 = "No issues mentioned here." +$linked4 = Get-LinkedIssueNumbers $body4 +Write-TestResult "Empty when no issues" ($linked4.Count -eq 0) + +Write-TestResult "Handles null body" ((Get-LinkedIssueNumbers $null).Count -eq 0) + +# ============================================================ +# Test: Get-DiffLinesByFile +# ============================================================ +Test-Section "Get-DiffLinesByFile" + +$simpleDiff = @" +diff --git a/src/File.cs b/src/File.cs +index abc..def 100644 +--- a/src/File.cs ++++ b/src/File.cs +@@ -10,4 +10,4 @@ namespace Foo + context line +-removed line ++added line + context line +"@ + +$parsed = Get-DiffLinesByFile -DiffText $simpleDiff +Write-TestResult "Parses one file" ($parsed.ContainsKey("src/File.cs")) +$fileLines = $parsed["src/File.cs"] +$removed = @($fileLines | Where-Object { $_.Sign -eq '-' }) +$added = @($fileLines | Where-Object { $_.Sign -eq '+' }) +Write-TestResult "Found 1 removed line" ($removed.Count -eq 1) +Write-TestResult "Found 1 added line" ($added.Count -eq 1) +Write-TestResult "Removed text correct" ($removed[0].Text -eq "removed line") +Write-TestResult "Added text correct" ($added[0].Text -eq "added line") +Write-TestResult "Removed line number = 11" ($removed[0].Line -eq 11) +Write-TestResult "Added line number = 11" ($added[0].Line -eq 11) + +# Multi-file diff +$multiDiff = @" +diff --git a/src/A.cs b/src/A.cs +--- a/src/A.cs ++++ b/src/A.cs +@@ -1,3 +1,3 @@ + keep +-old A ++new A + keep +diff --git a/src/B.cs b/src/B.cs +--- a/src/B.cs ++++ b/src/B.cs +@@ -5,2 +5,3 @@ + keep ++added to B + keep +"@ + +$parsedMulti = Get-DiffLinesByFile -DiffText $multiDiff +Write-TestResult "Parses two files" ($parsedMulti.Count -eq 2) +Write-TestResult "Has src/A.cs" ($parsedMulti.ContainsKey("src/A.cs")) +Write-TestResult "Has src/B.cs" ($parsedMulti.ContainsKey("src/B.cs")) + +# Handles "\ No newline at end of file" marker +$noNewlineDiff = @" +diff --git a/src/C.cs b/src/C.cs +--- a/src/C.cs ++++ b/src/C.cs +@@ -1,2 +1,2 @@ + keep +-old line +\ No newline at end of file ++new line +\ No newline at end of file +"@ + +$parsedNoNl = Get-DiffLinesByFile -DiffText $noNewlineDiff +$cLines = $parsedNoNl["src/C.cs"] +Write-TestResult "No-newline marker ignored (2 entries)" (@($cLines).Count -eq 2) + +# CRLF handling +$crlfDiff = "diff --git a/src/D.cs b/src/D.cs`r`n--- a/src/D.cs`r`n+++ b/src/D.cs`r`n@@ -1,2 +1,2 @@`r`n keep`r`n-old`r`n+new`r`n" +$parsedCrlf = Get-DiffLinesByFile -DiffText $crlfDiff +Write-TestResult "CRLF diff parsed correctly" ($parsedCrlf.ContainsKey("src/D.cs")) + +# ============================================================ +# Test: REVERT detection logic (simulated) +# ============================================================ +Test-Section "REVERT Detection Logic" + +# Simulate: PR removes a line that was added by a fix PR +$prDiff = @" +diff --git a/src/Handler.cs b/src/Handler.cs +--- a/src/Handler.cs ++++ b/src/Handler.cs +@@ -10,4 +10,3 @@ class Handler + keep +-parent.SetPadding(left, top, right, bottom); + keep + keep +"@ + +$fixDiff = @" +diff --git a/src/Handler.cs b/src/Handler.cs +--- a/src/Handler.cs ++++ b/src/Handler.cs +@@ -10,3 +10,4 @@ class Handler + keep ++parent.SetPadding(left, top, right, bottom); + keep + keep +"@ + +$prByFile = Get-DiffLinesByFile -DiffText $prDiff +$fixByFile = Get-DiffLinesByFile -DiffText $fixDiff + +$prRemoved = @($prByFile["src/Handler.cs"] | Where-Object { + $_.Sign -eq '-' -and -not (Test-IsTrivialLine (ConvertTo-NormalizedLine $_.Text)) +}) +$fixAdded = @($fixByFile["src/Handler.cs"] | Where-Object { + $_.Sign -eq '+' -and -not (Test-IsTrivialLine (ConvertTo-NormalizedLine $_.Text)) +} | ForEach-Object { ConvertTo-NormalizedLine $_.Text }) | Select-Object -Unique + +$addedSet = New-Object 'System.Collections.Generic.HashSet[string]' +foreach ($n in $fixAdded) { [void]$addedSet.Add($n) } + +$reverted = New-Object System.Collections.Generic.List[object] +foreach ($r in $prRemoved) { + $key = ConvertTo-NormalizedLine $r.Text + if ($addedSet.Contains($key)) { + $reverted.Add([PSCustomObject]@{ Text = $r.Text; Line = $r.Line }) + } +} + +Write-TestResult "Detects REVERT (1 reverted line)" ($reverted.Count -eq 1) +Write-TestResult "Reverted line text correct" ($reverted[0].Text -match "SetPadding") + +# ============================================================ +# Test: Whitespace-insensitive matching +# ============================================================ +Test-Section "Whitespace-Insensitive Matching" + +$prDiffWs = @" +diff --git a/src/Handler.cs b/src/Handler.cs +--- a/src/Handler.cs ++++ b/src/Handler.cs +@@ -10,4 +10,3 @@ class Handler + keep +- parent.SetPadding(left, top, right, bottom); + keep + keep +"@ + +$fixDiffWs = @" +diff --git a/src/Handler.cs b/src/Handler.cs +--- a/src/Handler.cs ++++ b/src/Handler.cs +@@ -10,3 +10,4 @@ class Handler + keep ++ parent.SetPadding(left, top, right, bottom); + keep + keep +"@ + +$prByFileWs = Get-DiffLinesByFile -DiffText $prDiffWs +$fixByFileWs = Get-DiffLinesByFile -DiffText $fixDiffWs + +$prRemovedWs = @($prByFileWs["src/Handler.cs"] | Where-Object { + $_.Sign -eq '-' -and -not (Test-IsTrivialLine (ConvertTo-NormalizedLine $_.Text)) +}) +$fixAddedWs = @($fixByFileWs["src/Handler.cs"] | Where-Object { + $_.Sign -eq '+' -and -not (Test-IsTrivialLine (ConvertTo-NormalizedLine $_.Text)) +} | ForEach-Object { ConvertTo-NormalizedLine $_.Text }) | Select-Object -Unique + +$addedSetWs = New-Object 'System.Collections.Generic.HashSet[string]' +foreach ($n in $fixAddedWs) { [void]$addedSetWs.Add($n) } + +$revertedWs = @() +foreach ($r in $prRemovedWs) { + $key = ConvertTo-NormalizedLine $r.Text + if ($addedSetWs.Contains($key)) { $revertedWs += $r } +} +Write-TestResult "Whitespace-different lines still match" ($revertedWs.Count -eq 1) + +# ============================================================ +# Test: Move-within-PR suppression +# ============================================================ +Test-Section "Move-Within-PR Suppression" + +# PR removes a line AND re-adds it (refactor/move) — should NOT be flagged as REVERT +$prDiffMove = @" +diff --git a/src/Handler.cs b/src/Handler.cs +--- a/src/Handler.cs ++++ b/src/Handler.cs +@@ -10,4 +10,4 @@ class Handler + keep +-parent.SetPadding(left, top, right, bottom); + keep ++parent.SetPadding(left, top, right, bottom); +"@ + +$prByFileMove = Get-DiffLinesByFile -DiffText $prDiffMove +$prRemovedMove = @($prByFileMove["src/Handler.cs"] | Where-Object { + $_.Sign -eq '-' -and -not (Test-IsTrivialLine (ConvertTo-NormalizedLine $_.Text)) +}) +$prAddedNormMove = New-Object 'System.Collections.Generic.HashSet[string]' +foreach ($a in ($prByFileMove["src/Handler.cs"] | Where-Object { $_.Sign -eq '+' })) { + [void]$prAddedNormMove.Add((ConvertTo-NormalizedLine $a.Text)) +} + +$revertedMove = @() +foreach ($r in $prRemovedMove) { + $key = ConvertTo-NormalizedLine $r.Text + if (-not $addedSet.Contains($key)) { continue } # not in fix PR + if ($prAddedNormMove.Contains($key)) { continue } # moved within PR + $revertedMove += $r +} +Write-TestResult "Move-within-PR not flagged as REVERT" ($revertedMove.Count -eq 0) + +# ============================================================ +# Test: Self-PR exclusion +# ============================================================ +Test-Section "Self-PR Exclusion" + +# The git-log parsing should exclude the current PR number +$commitLog = @" +abc1234 Some change (#100) +def5678 Fix bug (#200) +ghi9012 Another fix (#100) +"@ + +$prNumber = 100 +$seen = New-Object 'System.Collections.Generic.HashSet[int]' +$recentPRs = New-Object 'System.Collections.Generic.List[int]' +foreach ($line in ($commitLog -split "`n")) { + if ($line -match '\(#(\d+)\)') { + $n = [int]$Matches[1] + if ($n -ne $prNumber -and $seen.Add($n)) { + $recentPRs.Add($n) + } + } +} +Write-TestResult "Self-PR excluded" (-not ($recentPRs -contains 100)) +Write-TestResult "Other PRs included" ($recentPRs -contains 200) +Write-TestResult "Dedup works" ($recentPRs.Count -eq 1) + +# ============================================================ +# Summary +# ============================================================ +Write-Host "" +Write-Host "══════════════════════════════════════" -ForegroundColor Cyan +Write-Host " Results: $($script:TestsPassed) passed, $($script:TestsFailed) failed, $($script:TestsSkipped) skipped" -ForegroundColor $(if ($script:TestsFailed -gt 0) { "Red" } else { "Green" }) +Write-Host "══════════════════════════════════════" -ForegroundColor Cyan + +if ($script:TestsFailed -gt 0) { + exit 1 +} +exit 0 diff --git a/.github/skills/find-regression-risk/SKILL.md b/.github/skills/find-regression-risk/SKILL.md new file mode 100644 index 000000000000..506e1ff74634 --- /dev/null +++ b/.github/skills/find-regression-risk/SKILL.md @@ -0,0 +1,71 @@ +# find-regression-risk + +Detects potential regression risks in a PR by cross-referencing removed lines against lines added by recent labeled bug-fix PRs. + +## How It Works + +Purely mechanical — no AI/LLM. Five-step algorithm: + +1. **PR diff** — collects lines REMOVED by the PR under review. +2. **Git history** — `git log --follow --since=6mo` finds recent PRs that touched the same files. +3. **Label filter** — keeps PRs (or their linked issues) labeled `i/regression`, `t/bug`, `p/0`, or `p/1`. +4. **Fix diff** — fetches each fix PR's diff and collects lines it ADDED to the same file. +5. **Compare** — whitespace-insensitive string equality: + - 🔴 **REVERT** — removed line matches a line a fix PR added (highest risk). + - 🟡 **OVERLAP** — same file modified, but no exact line revert. + - 🟢 **CLEAN** — no bug-fix PRs touch the same files. + +## Standalone Invocation + +```powershell +# Analyze a specific PR (auto-detects files) +pwsh -NoProfile -Command '& ./.github/scripts/Find-RegressionRisks.ps1 -PRNumber 33908 -OutputDir /tmp/out' + +# Analyze specific files only +pwsh -NoProfile -Command '& ./.github/scripts/Find-RegressionRisks.ps1 -PRNumber 33908 -OutputDir /tmp/out -FilePaths @("src/Core/src/Platform/Android/MauiWindowInsetListener.cs")' +``` + +## Parameters + +| Parameter | Required | Default | Description | +|-----------|----------|---------|-------------| +| `-PRNumber` | Yes | — | PR number to analyze | +| `-Repo` | No | `dotnet/maui` | Repository in `owner/name` form | +| `-FilePaths` | No | auto-detect | Implementation files to check | +| `-MonthsBack` | No | `6` | History window for git log | +| `-MaxRecentPRsPerFile` | No | `20` | Rate-limit guard per file | +| `-BaseBranch` | No | `main` | Base branch for `git log` scope | +| `-OutputDir` | No | — | Directory for output files | +| `-WriteInlineFindings` | No | off | Emit `inline-findings.json` | + +## Outputs + +When `-OutputDir` is specified: + +- **`result.txt`** — single token: `CLEAN`, `OVERLAP`, or `REVERT` +- **`risks.json`** — structured findings for downstream agents +- **`content.md`** — markdown summary for the PR comment +- **`inline-findings.json`** — (only with `-WriteInlineFindings`) inline annotations + +## Integration + +The script runs as **STEP 4** in `Review-PR.ps1` (Regression Cross-Reference, after UI test detection and before the Gate step). Its `content.md` is assembled into the AI summary comment by `post-ai-summary-comment.ps1`. + +When REVERT risks are detected, the regression tests from the reverted fix PRs are executed: +- **UI tests** → `BuildAndRunHostApp.ps1 -Platform -TestFilter ` +- **Device tests** → `Run-DeviceTests.ps1 -Project -Platform -TestFilter ` +- **Unit/XAML tests** → `dotnet test --filter ` + +The expert reviewer agent (`maui-expert-reviewer.md`, dimension #6) reads `risks.json` to check for REVERT entries. + +## Known Limitations + +- **Inline findings**: The `-WriteInlineFindings` flag emits deletion-side (LEFT) annotations, but `post-inline-review.ps1` currently only posts RIGHT-side comments. LEFT-side findings are silently dropped. This is documented as future work. +- **Whitespace-only changes**: By design, an indent-only change to a fix line won't trigger a REVERT (the normalization collapses whitespace). This avoids false positives from reformatting. +- **`pwsh -File` array parameters**: When invoking standalone from bash, use `pwsh -Command '& ./script.ps1 -FilePaths @(...)'` syntax. `pwsh -File` doesn't evaluate `@()` expressions. + +## Tests + +```powershell +pwsh -NoProfile -File .github/scripts/tests/Test-FindRegressionRisks.ps1 +``` diff --git a/.gitignore b/.gitignore index daafde085962..2ca273187506 100644 --- a/.gitignore +++ b/.gitignore @@ -391,3 +391,4 @@ temp # Gradle build reports src/Core/AndroidNative/build/reports/ + diff --git a/eng/pipelines/ci-copilot.yml b/eng/pipelines/ci-copilot.yml index fa8baa15b0b4..0f5e04b72231 100644 --- a/eng/pipelines/ci-copilot.yml +++ b/eng/pipelines/ci-copilot.yml @@ -35,6 +35,8 @@ parameters: type: object default: name: AcesShared + demands: + - ImageOverride -equals ACES_VM_SharedPool_Tahoe - name: macPool type: object @@ -120,7 +122,7 @@ stages: skipAndroidPlatformApis: true onlyAndroidPlatformDefaultApis: true skipAndroidEmulatorImages: ${{ ne(parameters.Platform, 'android') }} - skipAndroidCreateAvds: true + skipAndroidCreateAvds: ${{ ne(parameters.Platform, 'android') }} androidEmulatorApiLevel: '30' skipSimulatorSetup: ${{ or(eq(parameters.Platform, 'android'), eq(parameters.Platform, 'windows'), eq(parameters.Platform, 'catalyst')) }} skipCertificates: true @@ -197,6 +199,15 @@ stages: fi echo "=== Starting Emulator ===" + # Kill ALL stale emulator processes from previous step retries + for STALE_PID in $(pgrep -f "qemu-system" 2>/dev/null || true); do + echo "Killing stale emulator PID $STALE_PID" + kill "$STALE_PID" 2>/dev/null || true + done + sleep 2 + for STALE_PID in $(pgrep -f "qemu-system" 2>/dev/null || true); do + kill -9 "$STALE_PID" 2>/dev/null || true + done # Kill any stale adb server and restart adb kill-server 2>/dev/null || true sleep 1 @@ -229,7 +240,12 @@ stages: echo "Waiting for emulator device (adb wait-for-device, 120s timeout)..." timeout 120 adb wait-for-device if [ $? -eq 0 ]; then - echo "Device detected: $(adb devices -l | grep emulator)" + # Capture device ID immediately while it's responsive + DETECTED_DEVICE=$(adb devices | grep "emulator.*device" | awk '{print $1}' | head -1) + if [ -z "$DETECTED_DEVICE" ]; then + DETECTED_DEVICE="emulator-5554" + fi + echo "Device detected: $DETECTED_DEVICE ($(adb devices -l | grep emulator || true))" break fi @@ -281,7 +297,11 @@ stages: fi done - DEVICE_ID=$(adb devices | grep "emulator.*device" | awk '{print $1}') + DEVICE_ID="${DETECTED_DEVICE:-$(adb devices | grep 'emulator.*device' | awk '{print $1}' | head -1)}" + if [ -z "$DEVICE_ID" ]; then + DEVICE_ID="emulator-5554" + echo "##[warning]Could not detect device ID, defaulting to $DEVICE_ID" + fi echo "✅ Emulator fully booted: $DEVICE_ID" # Prepare emulator for CI use — keeps device responsive during idle period @@ -387,14 +407,21 @@ stages: displayName: 'Install GitHub Copilot CLI' # Boot iOS Simulator (only for iOS platform) - # UI test baseline screenshots are captured on iPhone Xs - must use same device + # Match main CI ui-tests pipeline: defaultiOSVersion: '26.0' + # Snapshots are at src/Controls/tests/TestCases.iOS.Tests/snapshots/ios-26 + # UITest.cs picks ios-26 baseline when platformVersion starts with "26." - bash: | echo "=== Booting iOS Simulator ===" - # Find the latest stable iOS runtime (prefer 18.x, fallback to 17.x) + # Prefer iOS 26 (main pipeline default), fallback to 18.x then 17.x RUNTIME=$(xcrun simctl list runtimes available --json | jq -r ' - [.runtimes[] | select(.name | test("iOS 18"))] | sort_by(.version) | last | .identifier // empty + [.runtimes[] | select(.name | test("iOS 26"))] | sort_by(.version) | last | .identifier // empty ') + if [ -z "$RUNTIME" ]; then + RUNTIME=$(xcrun simctl list runtimes available --json | jq -r ' + [.runtimes[] | select(.name | test("iOS 18"))] | sort_by(.version) | last | .identifier // empty + ') + fi if [ -z "$RUNTIME" ]; then RUNTIME=$(xcrun simctl list runtimes available --json | jq -r ' [.runtimes[] | select(.name | test("iOS 17"))] | sort_by(.version) | last | .identifier // empty @@ -497,7 +524,13 @@ stages: sleep 2 adb start-server sleep 2 - timeout 60 adb wait-for-device + timeout 90 adb wait-for-device + # Wait for boot to complete after ADB reconnect + waited=0 + while [ "$(adb -s "$DEVICE_ID" shell getprop sys.boot_completed 2>/dev/null | tr -d '\r')" != "1" ]; do + sleep 5; waited=$((waited+5)) + [ $waited -ge 90 ] && { echo "##[warning]Emulator still not booted after ADB restart"; break; } + done fi # Dismiss ANR dialogs and wake screen — run twice for reliability @@ -539,12 +572,12 @@ stages: echo "✅ Emulator warmed up and responsive" displayName: 'Warm Up Android Emulator' condition: and(succeeded(), eq('${{ parameters.Platform }}', 'android')) - timeoutInMinutes: 3 + timeoutInMinutes: 6 + retryCountOnTaskFailure: 2 - bash: | echo "Running Copilot PR Reviewer Agent via Review-PR.ps1..." - echo "Reviewing PR #${PARAM_PR_NUMBER}..." - + echo "Reviewing PR #${{ parameters.PRNumber }}..." # Ensure copilot CLI is accessible to pwsh subprocess. # npm global install on Linux goes to UseNode@1 toolcache path which may not # be on PATH inside pwsh even when exported from bash. Create a symlink in @@ -621,27 +654,12 @@ stages: fi done - # Copy any Copilot session files + # Copy any Copilot session files (bash — works on Linux/macOS) if [ -d "$HOME/.copilot" ]; then echo "Copying Copilot session state..." cp -r "$HOME/.copilot" $(Build.ArtifactStagingDirectory)/copilot-logs/copilot-session-state || true fi - # Copy CustomAgentLogsTmp if it exists - if [ -d "CustomAgentLogsTmp" ]; then - echo "Copying CustomAgentLogsTmp..." - cp -r CustomAgentLogsTmp $(Build.ArtifactStagingDirectory)/copilot-logs/ || true - fi - - # Copy any Review_Feedback files - find . -name "Review_Feedback_*.md" -type f -exec cp {} $(Build.ArtifactStagingDirectory)/copilot-logs/ \; 2>/dev/null || true - - # Copy any .github/agent-pr-session files - if [ -d ".github/agent-pr-session" ]; then - echo "Copying agent-pr-session..." - cp -r .github/agent-pr-session $(Build.ArtifactStagingDirectory)/copilot-logs/ || true - fi - # Check for failure indicators in output if [ $COPILOT_EXIT_CODE -ne 0 ]; then echo "##vso[task.logissue type=error]Review-PR.ps1 exited with code $COPILOT_EXIT_CODE" @@ -657,6 +675,7 @@ stages: fi echo "Review output saved to $(Build.ArtifactStagingDirectory)/copilot-logs/" + name: RunReview # referenceable name so the new RunDeepUITests / UpdateAISummaryComment stages can read this step's output variables (detectedCategories, detectedPlatform) via $(stageDependencies.ReviewPR.CopilotReview.outputs['RunReview.']) displayName: 'Run PR Reviewer Agent' env: COPILOT_GITHUB_TOKEN: $(COPILOT_TOKEN) @@ -664,6 +683,38 @@ stages: DEVICE_UDID: $(DEVICE_UDID) PARAM_PR_NUMBER: ${{ parameters.PRNumber }} COMMENTS_VIA_FILE: "true" + DEFER_COMMENT_TO_STAGE3: "true" + + # Copy review artifacts into the CopilotLogs staging dir. + # Uses pwsh (not bash) so paths resolve correctly on Windows. + - pwsh: | + $logsDir = "$(Build.ArtifactStagingDirectory)/copilot-logs" + if (-not (Test-Path $logsDir)) { New-Item -ItemType Directory -Path $logsDir -Force | Out-Null } + + # CustomAgentLogsTmp (PRAgent content files for Stage 3 comment) + if (Test-Path "CustomAgentLogsTmp") { + Write-Host "Copying CustomAgentLogsTmp..." + Copy-Item -Path "CustomAgentLogsTmp" -Destination $logsDir -Recurse -Force -ErrorAction SilentlyContinue + } else { + Write-Host "##[warning]CustomAgentLogsTmp not found — Stage 3 comment may be incomplete" + } + + # agent-pr-session files + if (Test-Path ".github/agent-pr-session") { + Write-Host "Copying agent-pr-session..." + Copy-Item -Path ".github/agent-pr-session" -Destination $logsDir -Recurse -Force -ErrorAction SilentlyContinue + } + + # Review_Feedback files + Get-ChildItem -Path . -Filter "Review_Feedback_*.md" -Recurse -ErrorAction SilentlyContinue | + ForEach-Object { Copy-Item $_.FullName $logsDir -ErrorAction SilentlyContinue } + + Write-Host "Artifacts staged in $logsDir" + Get-ChildItem $logsDir -Recurse -File | Select-Object -First 20 | ForEach-Object { + Write-Host " $($_.FullName.Substring($logsDir.Length))" + } + displayName: 'Copy review artifacts to staging' + condition: succeededOrFailed() # Publish Copilot logs and session artifacts - task: PublishPipelineArtifact@1 @@ -691,3 +742,817 @@ stages: fi displayName: 'Check Copilot Result' condition: succeededOrFailed() + + # ───────────────────────────────────────────────────────────────────────────── + # STAGE: RunDeepUITests + # ───────────────────────────────────────────────────────────────────────────── + # After the Copilot review agent has detected UI test categories and posted + # an initial AI summary comment with in-process per-category results, this + # stage re-runs those same categories on a real platform-appropriate pool + # (Tahoe iOS sim / Ubuntu Android emu / Windows-2022 / macOS-14) instead of + # whatever VM the Copilot agent happened to land on. Each category becomes + # a sequential `BuildAndRunHostApp.ps1` invocation inside ONE job per + # platform; we can't matrix-fan-out at runtime because matrix expansion is + # compile-time in AzDO. The TRX files land in the drop-deep-uitests + # artifact for the next stage to consume. + # + # Skipped via `condition:` when: + # - ReviewPR didn't emit detectedCategories (script crashed pre-STEP 2) + # - detectedCategories == 'NONE' (no UI-relevant changes) + # + # Note: this runs AFTER ReviewPR completes, not in parallel. Parallel + # execution would require splitting STEP 2 (detection) into its own + # pre-stage; that's a follow-up. The first cut is sequential to keep the + # change small and incremental. + - stage: RunDeepUITests + displayName: 'Deep UI Tests (platform pool)' + dependsOn: ReviewPR + condition: and(in(dependencies.ReviewPR.result, 'Succeeded', 'SucceededWithIssues', 'Failed'), ne(dependencies.ReviewPR.outputs['CopilotReview.RunReview.detectedCategories'], ''), ne(dependencies.ReviewPR.outputs['CopilotReview.RunReview.detectedCategories'], 'NONE')) + jobs: + - job: RunUITests + displayName: 'Run detected UI test categories' + variables: + detectedCategories: $[ stageDependencies.ReviewPR.CopilotReview.outputs['RunReview.detectedCategories'] ] + # Use the SAME platform-pool selection logic as the CopilotReview + # job — the deep-test agent should be the right OS for the + # requested target platform. + ${{ if eq(parameters.Platform, 'android') }}: + pool: ${{ parameters.androidPool }} + ${{ elseif eq(parameters.Platform, 'ios') }}: + pool: ${{ parameters.iosPool }} + ${{ elseif eq(parameters.Platform, 'catalyst') }}: + pool: ${{ parameters.macPool }} + ${{ elseif eq(parameters.Platform, 'windows') }}: + pool: ${{ parameters.windowsPool }} + ${{ else }}: + pool: ${{ parameters.windowsPool }} + timeoutInMinutes: 240 + steps: + - checkout: self + fetchDepth: 0 + + # Bring in .NET + workloads + tasks DLL — same prerequisites the + # CopilotReview job used. Reusing the install-dotnet template + # keeps the SDK version pinned to global.json. + - template: common/provision.yml + parameters: + skipXcode: ${{ or(eq(parameters.Platform, 'android'), eq(parameters.Platform, 'windows'), eq(parameters.Platform, 'catalyst')) }} + skipProvisionator: true + skipJdk: ${{ ne(parameters.Platform, 'android') }} + skipAndroidCommonSdks: ${{ ne(parameters.Platform, 'android') }} + skipAndroidPlatformApis: true + onlyAndroidPlatformDefaultApis: true + skipAndroidEmulatorImages: ${{ ne(parameters.Platform, 'android') }} + skipAndroidCreateAvds: ${{ ne(parameters.Platform, 'android') }} + androidEmulatorApiLevel: '30' + skipSimulatorSetup: ${{ or(eq(parameters.Platform, 'android'), eq(parameters.Platform, 'windows'), eq(parameters.Platform, 'catalyst')) }} + skipCertificates: true + ${{ if eq(parameters.Platform, 'catalyst') }}: + openSslArgs: '' + + # Enable KVM for Android emulator on Linux agents (matches main CI) + - ${{ if eq(parameters.Platform, 'android') }}: + - template: common/enable-kvm.yml + # Free disk space on hosted Ubuntu agents — the emulator + SDK + + # workloads + AVD need ~15 GB but hosted agents start with limited + # free space. Remove pre-installed tools we don't need. + - bash: | + echo "=== Disk before cleanup ===" + df -h / + sudo rm -rf /usr/share/dotnet /usr/local/lib/android/sdk/ndk /usr/local/share/boost /opt/ghc /usr/local/.ghcup \ + /usr/share/swift /opt/hostedtoolcache/CodeQL /opt/hostedtoolcache/go /opt/hostedtoolcache/node \ + /usr/local/lib/android/sdk/build-tools/[0-2]* /usr/local/lib/android/sdk/platforms/android-[0-2]* \ + 2>/dev/null || true + sudo apt-get clean 2>/dev/null || true + echo "=== Disk after cleanup ===" + df -h / + displayName: 'Free disk space for Android emulator' + # Boot Android emulator with proper partition size and ADB setup. + # Same step as ReviewPR stage — creates AVD, reduces partition to + # 2048m (fits on hosted agents), pre-authorizes ADB keys, waits + # for full boot + package manager. The emulator stays running for + # BuildAndRunHostApp.ps1 which will find it via 'adb devices'. + - script: | + export ANDROID_SDK_ROOT="${ANDROID_SDK_ROOT:-/usr/local/lib/android/sdk}" + export PATH="$ANDROID_SDK_ROOT/platform-tools:$ANDROID_SDK_ROOT/emulator:$ANDROID_SDK_ROOT/cmdline-tools/latest/bin:$PATH" + + echo "=== Creating AVD ===" + echo "no" | avdmanager create avd -n Emulator_30 -k "system-images;android-30;google_apis_playstore;x86_64" --device "Nexus 5X" --force + AVD_CONFIG="$HOME/.android/avd/Emulator_30.avd/config.ini" + [ -f "$AVD_CONFIG" ] && sed -i 's/disk.dataPartition.size=.*/disk.dataPartition.size=2048m/' "$AVD_CONFIG" + + mkdir -p "$HOME/.android" + [ ! -f "$HOME/.android/adbkey" ] && adb keygen "$HOME/.android/adbkey" 2>/dev/null || true + ADB_KEY_PUB="$HOME/.android/adbkey.pub" + AVD_DIR="$HOME/.android/avd/Emulator_30.avd" + [ -f "$ADB_KEY_PUB" ] && [ -d "$AVD_DIR" ] && cp "$ADB_KEY_PUB" "$AVD_DIR/adbkey.pub" + + # Kill ALL stale emulator processes from previous step retries + for STALE_PID in $(pgrep -f "qemu-system" 2>/dev/null || true); do + echo "Killing stale emulator PID $STALE_PID" + kill "$STALE_PID" 2>/dev/null || true + done + sleep 2 + for STALE_PID in $(pgrep -f "qemu-system" 2>/dev/null || true); do + kill -9 "$STALE_PID" 2>/dev/null || true + done + + adb kill-server 2>/dev/null || true; sleep 1; adb start-server + nohup emulator -avd Emulator_30 -gpu swiftshader_indirect -no-window -no-snapshot -no-audio -no-boot-anim -partition-size 2048 > /tmp/emulator.log 2>&1 & + echo "Emulator PID: $!" + + echo "Waiting for device..." + timeout 120 adb wait-for-device || { echo "##[error]adb wait-for-device timed out"; tail -30 /tmp/emulator.log; exit 1; } + + echo "Waiting for boot_completed..." + waited=0 + while [ "$(adb shell getprop sys.boot_completed 2>/dev/null | tr -d '\r')" != "1" ]; do + sleep 5; waited=$((waited+5)) + [ $waited -ge 300 ] && { echo "##[error]Boot timeout"; exit 1; } + [ $waited -eq 90 ] && { adb kill-server; sleep 2; adb start-server; sleep 2; } + done + + echo "Waiting for package manager..." + waited=0 + while ! adb shell pm list packages 2>/dev/null | grep -q "package:"; do + sleep 5; waited=$((waited+5)) + [ $waited -ge 120 ] && { echo "##[error]PM timeout"; exit 1; } + done + + DEVICE_ID=$(adb devices | grep "emulator.*device" | awk '{print $1}' | head -1) + if [ -z "$DEVICE_ID" ]; then + DEVICE_ID="emulator-5554" + echo "##[warning]Could not detect device ID, defaulting to $DEVICE_ID" + fi + echo "✅ Emulator booted: $DEVICE_ID" + adb -s $DEVICE_ID shell settings put global window_animation_scale 0.0 || true + adb -s $DEVICE_ID shell settings put global transition_animation_scale 0.0 || true + adb -s $DEVICE_ID shell settings put global animator_duration_scale 0.0 || true + adb -s $DEVICE_ID shell settings put system screen_off_timeout 2147483647 || true + adb -s $DEVICE_ID shell svc power stayon true || true + adb -s $DEVICE_ID shell input keyevent 82 || true + adb -s $DEVICE_ID shell am broadcast -a android.intent.action.CLOSE_SYSTEM_DIALOGS 2>/dev/null || true + echo "##vso[task.setvariable variable=DEVICE_UDID]$DEVICE_ID" + echo "##vso[task.prependpath]$ANDROID_SDK_ROOT/platform-tools" + echo "##vso[task.prependpath]$ANDROID_SDK_ROOT/emulator" + displayName: 'Create AVD and Boot Android Emulator' + retryCountOnTaskFailure: 3 + timeoutInMinutes: 15 + + # ios-26 snapshot baselines were captured on iOS 26.4 (PR #35061). + # Tahoe agents (macOS 26.4) have Xcode 26.3 which can download + # iOS 26.4 simulator. provision.yml only installs 26.0 (for build). + # Explicitly download 26.4 so visual tests match baselines exactly. + - ${{ if eq(parameters.Platform, 'ios') }}: + - script: | + set -x + echo "=== Current runtimes ===" + xcrun simctl list runtimes + + echo "=== Trying to install iOS 26.4 ===" + LATEST_XCODE=$(ls -d /Applications/Xcode_26*.app 2>/dev/null | sort -V | tail -1) + if [ -n "$LATEST_XCODE" ]; then + echo "Using $LATEST_XCODE" + sudo xcode-select -s "$LATEST_XCODE/Contents/Developer" + fi + + # Attempt 1: download latest iOS platform (no version specified) + echo "--- Attempt 1: latest iOS ---" + sudo xcodebuild -downloadPlatform iOS 2>&1 || true + + # Attempt 2: with universal architecture variant + echo "--- Attempt 2: iOS 26.4 universal ---" + sudo xcodebuild -downloadPlatform iOS -architectureVariant universal -buildVersion 26.4 2>&1 || true + + # Attempt 3: exact Apple build number + echo "--- Attempt 3: build 23E244 ---" + sudo xcodebuild -downloadPlatform iOS -buildVersion 23E244 2>&1 || true + + # Restore Xcode for build step + RESTORE_XCODE=$(ls -d /Applications/Xcode_$(REQUIRED_XCODE)*.app 2>/dev/null | head -1) + [ -n "$RESTORE_XCODE" ] && sudo xcode-select -s "$RESTORE_XCODE/Contents/Developer" + + echo "=== Final runtimes ===" + xcrun simctl list runtimes + displayName: 'Install iOS 26.4 simulator' + continueOnError: true + + # Catalyst (MacCatalyst) runs directly on the Mac host — no device needed. + # Mirrors main CI ui-tests-steps.yml: disable Notification Center + # (intercepts UI interactions) and macOS text autocorrect. + - ${{ if eq(parameters.Platform, 'catalyst') }}: + - bash: | + chmod +x $(System.DefaultWorkingDirectory)/eng/scripts/disable-notification-center.sh + $(System.DefaultWorkingDirectory)/eng/scripts/disable-notification-center.sh + displayName: 'Disable Notification Center' + continueOnError: true + timeoutInMinutes: 5 + + # Disable macOS text autocorrect for iOS and Catalyst (mirrors main CI). + # Autocapitalize/spellcheck can interfere with Appium text entry tests. + - ${{ if or(eq(parameters.Platform, 'ios'), eq(parameters.Platform, 'catalyst')) }}: + - task: PowerShell@2 + inputs: + targetType: 'inline' + script: | + defaults write -g NSAutomaticCapitalizationEnabled -bool false + defaults write -g NSAutomaticTextCompletionEnabled -bool false + defaults write -g NSAutomaticSpellingCorrectionEnabled -bool false + displayName: 'Disable macOS text autocorrect' + continueOnError: true + + # Windows UI tests run on the host desktop. Set screen resolution + # to 1920x1080 (AzDO hosted agents default to 1024x768) so + # controls are fully visible during Appium interactions. + - ${{ if eq(parameters.Platform, 'windows') }}: + - pwsh: | + $scriptPath = Join-Path "$(System.DefaultWorkingDirectory)" "eng" "scripts" "Set-ScreenResolution.ps1" + if (Test-Path $scriptPath) { + & $scriptPath -Width 1920 -Height 1080 + } else { + Write-Host "##[warning]Set-ScreenResolution.ps1 not found — using default resolution" + } + displayName: 'Set screen resolution (1920x1080)' + continueOnError: true + + # Install .NET workloads (same as ReviewPR stage) — without this, + # dotnet build fails with NETSDK1147 because the ios/android workloads + # are not present after provision.yml (which only installs the SDK). + - pwsh: ./build.ps1 --target=dotnet --configuration="Release" --verbosity=diagnostic + displayName: 'Install .NET and workloads' + retryCountOnTaskFailure: 2 + env: + DOTNET_TOKEN: $(dotnetbuilds-internal-container-read-token) + PRIVATE_BUILD: $(PrivateBuild) + + - pwsh: echo "##vso[task.prependpath]$(DotNet.Dir)" + displayName: 'Add .NET to PATH' + + - ${{ if eq(parameters.Platform, 'android') }}: + - pwsh: | + $sdk = $env:ANDROID_SDK_ROOT + if (-not $sdk) { $sdk = $env:ANDROID_HOME } + if (-not $sdk) { $sdk = "$env:HOME/Library/Android/sdk" } + $pt = Join-Path $sdk "platform-tools" + $em = Join-Path $sdk "emulator" + Write-Host "Adding Android tools to PATH: $pt, $em" + echo "##vso[task.prependpath]$pt" + echo "##vso[task.prependpath]$em" + displayName: 'Add Android SDK tools to PATH' + + - pwsh: ./build.ps1 --target=dotnet-buildtasks --configuration="Release" --verbosity=diagnostic + displayName: 'Build MSBuild Tasks' + retryCountOnTaskFailure: 1 + env: + DOTNET_TOKEN: $(dotnetbuilds-internal-container-read-token) + PRIVATE_BUILD: $(PrivateBuild) + + # Install Node.js and Appium — required by the UITest.Appium + # AppiumServerContext to boot a local Appium server. Same setup + # the existing CopilotReview job uses (see lines 316-329 of this + # file). Without these the test process throws + # InvalidServerInstanceException("There is no installed nodes") + # at the OneTimeSetUp boundary and ALL discovered tests fail. + - task: UseNode@1 + inputs: + version: "24.x" + displayName: 'Install Node.js' + + - pwsh: | + $skipAppiumDoctor = if ($IsMacOS -or $IsLinux) { "true" } else { "false" } + dotnet build ./src/Provisioning/Provisioning.csproj -t:ProvisionAppium -p:SkipAppiumDoctor="$skipAppiumDoctor" -bl:"$(LogDirectory)/provision-appium.binlog" + displayName: 'Install Appium' + retryCountOnTaskFailure: 2 + timeoutInMinutes: 10 + env: + APPIUM_HOME: $(APPIUM_HOME) + + - bash: | + set -e + git config user.email "copilot-ci@microsoft.com" + git config user.name "Copilot CI" + # Merge the PR head commit so we run tests against the same + # tree the Copilot reviewer saw. Mirror Review-PR.ps1 STEP 1 + # logic (squash-merge, fall back to head checkout on + # conflict — but in the conflict case the ReviewPR stage + # would have already failed and we wouldn't reach here). + git fetch origin pull/${{ parameters.PRNumber }}/head:pr-${{ parameters.PRNumber }} + git checkout -b deep-uitests-pr-${{ parameters.PRNumber }} + git merge --squash pr-${{ parameters.PRNumber }} || { + echo "Squash merge had conflicts — falling back to direct head checkout" + git merge --abort 2>/dev/null || true + git checkout pr-${{ parameters.PRNumber }} + } + git commit -m "PR ${{ parameters.PRNumber }} merge for deep UI tests" --allow-empty || true + displayName: 'Merge PR for testing' + + # Bypass the iOS/MacCatalyst SDK's strict Xcode-version check. + # Same patch the CopilotReview job performs (see lines ~571-580 + # of this file). Without it, .NET 10 iOS workload (which pins + # to e.g. Xcode 26.0) refuses to build on agents that have + # Xcode 26.1.1 selected — even though the produced app runs + # fine on the simulator. + - bash: | + set -e + if [ -f Directory.Build.Override.props.in ]; then + cp Directory.Build.Override.props.in Directory.Build.Override.props + fi + if [ ! -f Directory.Build.Override.props ]; then + printf '\n\n\n' > Directory.Build.Override.props + fi + if [[ "$(uname)" == "Linux" ]]; then + sed -i 's|| false\n|' Directory.Build.Override.props + elif [[ "$(uname)" == "Darwin" ]]; then + sed -i '' 's|| false\n|' Directory.Build.Override.props + else + sed -i 's|| false\n|' Directory.Build.Override.props + fi + echo "===== Directory.Build.Override.props =====" + cat Directory.Build.Override.props + displayName: 'Disable Xcode version validation' + + - pwsh: | + $ErrorActionPreference = 'Continue' + $cats = "$(detectedCategories)" + $platform = "${{ parameters.Platform }}" + Write-Host "Detected categories from ReviewPR stage: $cats" + Write-Host "Platform: $platform" + + if ([string]::IsNullOrWhiteSpace($cats) -or $cats -eq 'NONE') { + Write-Host "Nothing to run — skipping" + exit 0 + } + + $isRunAll = ($cats -eq 'ALL') + if ($isRunAll) { + Write-Host "Run-all mode detected — running without category filter" + # Single-element list with empty string triggers one iteration + # of the loop below without passing -Category to the runner. + $catList = @('') + } else { + # Same per-category loop the in-process STEP 3 does, only + # this time on a proper platform-pool agent. Each TRX lands + # in its own subdir so the aggregator can split per category. + $catList = @($cats -split ',' | ForEach-Object { $_.Trim() } | Where-Object { $_ }) + } + $outputRoot = "$(Build.ArtifactStagingDirectory)/deep-uitests" + New-Item -ItemType Directory -Force -Path $outputRoot | Out-Null + + # Dot-source the shared retry wrapper so Stage 2 gets the same + # env-error detection, device recovery, and retry logic as Stage 1. + $retryScript = ".github/scripts/shared/Invoke-UITestWithRetry.ps1" + $hasRetryWrapper = Test-Path $retryScript + + $hadFailure = $false + foreach ($cat in $catList) { + $safeCat = if ([string]::IsNullOrEmpty($cat)) { 'ALL' } else { $cat -replace '[^A-Za-z0-9_.-]', '_' } + $catDir = Join-Path $outputRoot "drop-${platform}_ui_tests-controls-$safeCat" + New-Item -ItemType Directory -Force -Path $catDir | Out-Null + $displayCat = if ([string]::IsNullOrEmpty($cat)) { '(all tests)' } else { $cat } + Write-Host "============================================================" + Write-Host " Running category: $displayCat (platform=$platform)" + Write-Host "============================================================" + $catLog = Join-Path $catDir "build-output.log" + # Diagnostic: dump exact args before invocation so any quoting + # issue or stray characters in the category value are visible + # in the log. + Write-Host "DEBUG: cat='$cat' (length=$($cat.Length))" + Write-Host "DEBUG: platform='$platform' (length=$($platform.Length))" + Write-Host "DEBUG: PWD='$(Get-Location)'" + Write-Host "DEBUG: BuildAndRunHostApp.ps1 exists: $(Test-Path '.github/scripts/BuildAndRunHostApp.ps1')" + try { + if ($hasRetryWrapper) { + # Use Invoke-UITestWithRetry for env-error retry + device recovery. + # Only pass -Category when we have a specific category (not run-all). + $retryParams = @{ + Platform = $platform + RepoRoot = (Get-Location).Path + LogFile = $catLog + } + if (-not [string]::IsNullOrEmpty($cat)) { $retryParams.Category = $cat } + if ($env:DEVICE_UDID) { $retryParams.DeviceUdid = $env:DEVICE_UDID } + $runResult = & $retryScript @retryParams + $exitCode = if ($runResult) { $runResult.ExitCode } else { -1 } + Write-Host "Attempts: $(if ($runResult) { $runResult.Attempts } else { '?' }) · Exit: $exitCode · EnvError: $(if ($runResult) { $runResult.EnvErrorHit } else { 'N/A' })" + + # Copy the specific TRX file from the result into the category dir + if ($runResult -and $runResult.TrxResultFile -and (Test-Path $runResult.TrxResultFile)) { + $dest = Join-Path $catDir (Split-Path -Leaf $runResult.TrxResultFile) + if (-not (Test-Path $dest)) { Copy-Item $runResult.TrxResultFile $dest -ErrorAction SilentlyContinue } + } + + if ($exitCode -ne 0) { + Write-Host "Category $cat exited with code $exitCode" -ForegroundColor Yellow + $hadFailure = $true + } + } else { + # Fallback: call BuildAndRunHostApp.ps1 directly + $argList = @( + '-NoProfile', + '-File', '.github/scripts/BuildAndRunHostApp.ps1', + '-Platform', $platform + ) + if (-not [string]::IsNullOrEmpty($cat)) { + $argList += @('-Category', $cat) + } + if ($env:DEVICE_UDID) { + $argList += @('-DeviceUdid', $env:DEVICE_UDID) + } + Write-Host "DEBUG: invoking pwsh with args: $($argList -join ' | ')" + & pwsh @argList 2>&1 | Tee-Object -FilePath $catLog | ForEach-Object { Write-Host $_ } + if ($LASTEXITCODE -ne 0) { + Write-Host "Category $cat exited with code $LASTEXITCODE" -ForegroundColor Yellow + $hadFailure = $true + } + } + } catch { + Write-Host "Test runner threw: $_" -ForegroundColor Red + $hadFailure = $true + } + # If the retry wrapper didn't produce a TRX (or we used fallback), + # scan the TRX results directory for this category's TRX only. + # Use filename matching instead of a time-based filter to avoid + # picking up TRX files from other categories or missing slow runs. + $existingTrx = @(Get-ChildItem -Path $catDir -Filter "*.trx" -ErrorAction SilentlyContinue) + if ($existingTrx.Count -eq 0) { + # Look for TRX by category name pattern in common output locations + $trxSearchDirs = @(".", "TestResults", "src/Controls/tests/TestCases.Shared.Tests/TestResults") + foreach ($searchDir in $trxSearchDirs) { + if (Test-Path $searchDir) { + $found = Get-ChildItem -Path $searchDir -Filter "*$safeCat*.trx" -Recurse -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($found) { + $dest = Join-Path $catDir $found.Name + if (-not (Test-Path $dest)) { Copy-Item $found.FullName $dest -ErrorAction SilentlyContinue } + break + } + } + } + } + + # Capture snapshot-diff PNGs that VisualRegressionTester writes + # to $BUILD_ARTIFACTSTAGINGDIRECTORY/Controls.TestCases.Shared.Tests/snapshots-diff + # (see ui-tests-collect-snapshot-diffs.yml for reference impl). + # Move them into the per-category folder so they ship in the + # drop-deep-uitests artifact alongside the TRX. Move (not copy) + # so the next category's run starts with a clean diff folder. + $snapDiffSrc = Join-Path "$(Build.ArtifactStagingDirectory)" "Controls.TestCases.Shared.Tests/snapshots-diff" + if (Test-Path $snapDiffSrc) { + $snapDiffDest = Join-Path $catDir "snapshots-diff" + Write-Host "Moving snapshot-diffs from $snapDiffSrc -> $snapDiffDest" + Move-Item -Path $snapDiffSrc -Destination $snapDiffDest -Force -ErrorAction SilentlyContinue + } + } + + if ($hadFailure) { + # Don't fail the stage — the AI summary comment is the + # deliverable; failed tests get reported there. Stage-level + # failure would prevent the UpdateAISummaryComment stage + # from running. + Write-Host "##vso[task.logissue type=warning]One or more deep UI test categories failed (see TRX in drop-deep-uitests artifact)" + } + displayName: 'Run deep UI tests (per-category loop)' + timeoutInMinutes: 220 + + # Re-enable Notification Center after Catalyst tests (mirrors main CI cleanup) + - ${{ if eq(parameters.Platform, 'catalyst') }}: + - bash: | + chmod +x $(System.DefaultWorkingDirectory)/eng/scripts/enable-notification-center.sh + $(System.DefaultWorkingDirectory)/eng/scripts/enable-notification-center.sh + displayName: 'Re-enable Notification Center' + condition: succeededOrFailed() + continueOnError: true + timeoutInMinutes: 5 + + - task: PublishPipelineArtifact@1 + displayName: 'Publish drop-deep-uitests' + inputs: + targetPath: '$(Build.ArtifactStagingDirectory)/deep-uitests' + artifact: 'drop-deep-uitests' + publishLocation: 'pipeline' + condition: succeededOrFailed() + + # ───────────────────────────────────────────────────────────────────────────── + # STAGE: PostAISummaryComment + # ───────────────────────────────────────────────────────────────────────────── + # Final stage. Depends on both ReviewPR (which posted the initial AI + # summary comment and emitted aiSummaryCommentId) and RunDeepUITests + # (which produced the TRX artifacts on the right pool). Downloads the + # artifacts, parses them via Aggregate-UITestArtifacts.ps1, and edits + # the existing PR comment to replace the in-process STEP 3 section + # with the deep-test results. + - stage: UpdateAISummaryComment + displayName: 'Post AI Summary Comment' + dependsOn: + - ReviewPR + - RunDeepUITests + condition: and(in(dependencies.RunDeepUITests.result, 'Succeeded', 'SucceededWithIssues', 'Failed', 'Skipped'), or(ne(dependencies.ReviewPR.outputs['CopilotReview.RunReview.aiSummaryCommentId'], ''), in(dependencies.RunDeepUITests.result, 'Succeeded', 'SucceededWithIssues', 'Failed'))) + jobs: + - job: UpdateComment + displayName: 'Post AI summary with review + deep test results' + # Job-level variables can use $[ stageDependencies... ] (cross-stage, + # job context). The stage condition above already gated emptiness; + # this just makes the value available as $(aiSummaryCommentId) + # inside the steps. + variables: + aiSummaryCommentId: $[ stageDependencies.ReviewPR.CopilotReview.outputs['RunReview.aiSummaryCommentId'] ] + pool: + name: Azure Pipelines + vmImage: ubuntu-22.04 + timeoutInMinutes: 30 + steps: + - checkout: self + + - task: DownloadPipelineArtifact@2 + displayName: 'Download CopilotLogs' + inputs: + buildType: 'current' + artifactName: 'CopilotLogs' + targetPath: '$(Pipeline.Workspace)/CopilotLogs' + # Continue if ReviewPR crashed before publishing CopilotLogs — + # the DEFERRED fallback can still post deep test results alone. + continueOnError: true + + - task: DownloadPipelineArtifact@2 + displayName: 'Download drop-deep-uitests' + inputs: + buildType: 'current' + artifactName: 'drop-deep-uitests' + targetPath: '$(Pipeline.Workspace)/drop-deep-uitests' + # Always attempt download — continueOnError handles the case where + # RunDeepUITests was skipped and no artifact exists. The previous + # condition-based skip using deepTestsRan was unreliable because + # AzDO's $[ in() ] expression can return unexpected values depending + # on stage result propagation timing. + continueOnError: true + + - pwsh: | + $ErrorActionPreference = 'Continue' + $artDir = "$(Pipeline.Workspace)/drop-deep-uitests" + $copilotLogsDir = "$(Pipeline.Workspace)/CopilotLogs" + $prNumber = "${{ parameters.PRNumber }}" + $commentId = "$(aiSummaryCommentId)" + $isDeferred = ($commentId -eq 'DEFERRED') + + # Diagnostic logging for Stage 3 debugging + Write-Host "=== Stage 3 Diagnostics ===" -ForegroundColor Cyan + Write-Host " commentId: '$commentId'" + Write-Host " isDeferred: $isDeferred" + Write-Host " artDir exists: $(Test-Path $artDir)" + Write-Host " copilotLogsDir exists: $(Test-Path $copilotLogsDir)" + if (Test-Path $artDir) { + $trxCount = @(Get-ChildItem -Path $artDir -Filter "*.trx" -Recurse -ErrorAction SilentlyContinue).Count + Write-Host " TRX files in artDir: $trxCount" + Get-ChildItem -Path $artDir -Recurse -ErrorAction SilentlyContinue | Select-Object -First 10 | ForEach-Object { + Write-Host " $($_.FullName.Substring($artDir.Length))" -ForegroundColor Gray + } + } + + if ([string]::IsNullOrWhiteSpace($commentId)) { + # Reviewer crashed before posting the initial comment. If deep + # tests produced results, fall back to DEFERRED mode to post + # a degraded comment with test results only. + if (Test-Path $artDir) { + Write-Host "No AI summary comment ID but deep test artifacts exist — falling back to DEFERRED mode" + $commentId = 'DEFERRED' + $isDeferred = $true + } else { + Write-Host "No AI summary comment ID and no deep test artifacts — nothing to do" + exit 0 + } + } + + # Aggregator returns @{ category -> @{ Total/Passed/Failed/.../Results } } + # using the SAME shape the in-process STEP 3 renderer expects + # so we can reuse the markdown generation pattern directly. + $aggScript = ".github/scripts/shared/Aggregate-UITestArtifacts.ps1" + if (-not (Test-Path $aggScript)) { throw "$aggScript missing" } + + # Dot-source shared functions (no Invoke-Expression) + . .github/scripts/shared/Get-TrxResults.ps1 + . .github/scripts/shared/Get-CategoryFromArtifactName.ps1 + . .github/scripts/shared/Get-AggregatedTrxFromDirectory.ps1 + $byCat = Get-AggregatedTrxFromDirectory -RootDir $artDir + if (-not $byCat -or $byCat.Count -eq 0) { + Write-Host "Aggregator returned no categories" + # No deep test results — but in DEFERRED mode we still need to + # post the review-only comment (without deep section). + } + + $deepBlock = '' + if ($byCat -and $byCat.Count -gt 0) { + + # Render the new STEP 3 section. + $totalPassed = 0; $totalFailed = 0 + $sb = [System.Text.StringBuilder]::new() + [void]$sb.AppendLine() + [void]$sb.AppendLine("### 🧪 UI Test Execution Results (deep, platform pool)") + [void]$sb.AppendLine() + [void]$sb.AppendLine("| Category | Tests | Snapshot diffs |") + [void]$sb.AppendLine("|---|---|---|") + $perCategoryFailures = [ordered]@{} + foreach ($k in ($byCat.Keys | Sort-Object)) { + $b = $byCat[$k] + $totalPassed += [int]$b.Passed + $totalFailed += [int]$b.Failed + $tCount = [int]$b.Total + $tPass = [int]$b.Passed + $tFail = [int]$b.Failed + $col = if ($tCount -eq 0) { '—' } + elseif ($tFail -gt 0) { "$tPass/$tCount ($tFail ❌)" } + else { "$tPass/$tCount ✓" } + # Count snapshot-diff PNGs we shipped in this artifact subdir + $catDir = Join-Path $artDir $b.ArtifactName + $diffCount = 0 + if (Test-Path $catDir) { + $diffCount = @(Get-ChildItem -Path $catDir -Filter "*-diff.png" -Recurse -ErrorAction SilentlyContinue).Count + } + $diffCol = if ($diffCount -gt 0) { "$diffCount diff PNG$(if ($diffCount -eq 1) {'' } else {'s'})" } else { '—' } + [void]$sb.AppendLine("| ``$k`` | $col | $diffCol |") + + # Capture failed test entries from the parsed TRX so we can + # render a per-category disclosure section listing the actual + # failing test names + the first line of their error message. + $catFailed = @() + foreach ($r in @($b.Results)) { + if ($r.status -eq 'Failed') { + $catFailed += [pscustomobject]@{ + Name = $r.name + Error = $r.error -as [string] + Stack = $r.stack -as [string] + } + } + } + if ($catFailed.Count -gt 0) { + $perCategoryFailures[$k] = $catFailed + } + } + + # Per-category failed-test disclosure sections (collapsed by + # default to keep the comment compact). + if ($perCategoryFailures.Count -gt 0) { + [void]$sb.AppendLine() + foreach ($cat in $perCategoryFailures.Keys) { + $items = $perCategoryFailures[$cat] + [void]$sb.AppendLine("
$cat — $($items.Count) failed test$(if ($items.Count -eq 1) {''} else {'s'})") + [void]$sb.AppendLine("
") + [void]$sb.AppendLine() + foreach ($it in $items | Select-Object -First 30) { + $errText = if (-not [string]::IsNullOrWhiteSpace($it.Error)) { $it.Error.Trim() } else { '' } + $stackText = if (-not [string]::IsNullOrWhiteSpace($it.Stack)) { $it.Stack.Trim() } else { '' } + $combined = $errText + if ($stackText) { $combined = $combined + [Environment]::NewLine + $stackText } + if ($combined.Length -gt 1000) { $combined = $combined.Substring(0, 1000) + [Environment]::NewLine + '...' } + [void]$sb.AppendLine("
$($it.Name)") + [void]$sb.AppendLine('
') + [void]$sb.AppendLine() + if ($combined) { + $fence = [string]::new([char]96, 3) + [void]$sb.AppendLine($fence) + [void]$sb.AppendLine($combined) + [void]$sb.AppendLine($fence) + } + [void]$sb.AppendLine() + [void]$sb.AppendLine("
") + [void]$sb.AppendLine() + } + if ($items.Count -gt 30) { + [void]$sb.AppendLine("_(+$($items.Count - 30) more — see TRX in artifact)_") + [void]$sb.AppendLine() + } + [void]$sb.AppendLine("
") + [void]$sb.AppendLine() + } + } + + # Link to the published artifact so reviewers can download the + # snapshot-diff PNGs to triage visual regressions. + $buildId = "$(Build.BuildId)" + $orgUri = "$(System.CollectionUri)".TrimEnd('/') + $project = "$(System.TeamProject)" + $artifactUrl = "$orgUri/$project/_build/results?buildId=$buildId&view=artifacts&pathAsName=false&type=publishedArtifacts" + [void]$sb.AppendLine("📎 [Download ``drop-deep-uitests`` artifact (TRX + snapshot diffs)]($artifactUrl)") + [void]$sb.AppendLine() + + $resultIcon = if ($totalFailed -gt 0) { '❌' } elseif ($totalPassed -gt 0) { '✅' } else { '⏭️' } + $headerLine = "$resultIcon **Deep UI tests** — $totalPassed passed, $totalFailed failed across $($byCat.Count) categor$(if ($byCat.Count -eq 1) {'y'} else {'ies'}) on platform-pool agent (replaces in-process counts above)." + + $beginMarker = '' + $endMarker = '' + $deepBlock = "$beginMarker" + [Environment]::NewLine + "$headerLine" + [Environment]::NewLine + $sb.ToString() + "$endMarker" + } # end if ($byCat.Count -gt 0) + + if ($isDeferred) { + # ── DEFERRED MODE: Post full comment with deep results included ── + # Guard against duplicate comments on pipeline retry: check if + # an AI Summary comment already exists for this PR. + $existingComment = gh api "repos/dotnet/maui/issues/$prNumber/comments?per_page=100" --paginate --jq '.[] | select(.body | contains("")) | .id' 2>$null | Select-Object -Last 1 + if ($existingComment) { + Write-Host "Existing AI Summary comment found ($existingComment) — will PATCH instead of creating new" + $commentId = $existingComment + $isDeferred = $false + } + } + + if ($isDeferred) { + # ── DEFERRED MODE (first run): Post full comment ── + # Find the PRAgent content dir from CopilotLogs artifact + $prAgentDir = Get-ChildItem -Path $copilotLogsDir -Recurse -Directory -Filter "PRAgent" | Select-Object -First 1 + if (-not $prAgentDir) { + Write-Host "PRAgent directory not found in CopilotLogs — falling back to posting deep results only" + } else { + # Replace in-process results with deep results in uitests/content.md (if available) + if ($deepBlock) { + $uitestContent = Join-Path $prAgentDir.FullName "uitests/content.md" + if (Test-Path $uitestContent) { + $existing = Get-Content $uitestContent -Raw + # Strip in-process "SKIPPED" section — search for the header + $idx = -1 + foreach ($marker in @('UI Test Execution Results', 'SKIPPED')) { + $found = $existing.IndexOf($marker) + if ($found -gt 0) { + # Back up to start of line + $lineStart = $existing.LastIndexOf([char]10, $found) + if ($lineStart -lt 0) { $lineStart = 0 } else { $lineStart++ } + $idx = $lineStart + break + } + } + if ($idx -gt 0) { + $existing = $existing.Substring(0, $idx).TrimEnd() + } + $existing = ($existing -split [Environment]::NewLine | Where-Object { + $_ -notmatch 'DEEP_UITESTS_BEGIN|DEEP_UITESTS_END' + }) -join [Environment]::NewLine + ($existing.TrimEnd() + [Environment]::NewLine + [Environment]::NewLine + $deepBlock) | Set-Content $uitestContent -Encoding UTF8 + Write-Host "Replaced in-process results with deep results" + } + } else { + Write-Host "No deep results — posting review-only comment" + } + + # Copy PRAgent dir to expected location for post-ai-summary-comment.ps1 + $targetDir = "CustomAgentLogsTmp/PRState/$prNumber/PRAgent" + New-Item -ItemType Directory -Force -Path (Split-Path -Parent $targetDir) | Out-Null + Copy-Item -Path $prAgentDir.FullName -Destination $targetDir -Recurse -Force + + # Post the full comment + $postScript = ".github/scripts/post-ai-summary-comment.ps1" + if (Test-Path $postScript) { + Write-Host "Posting full AI summary comment with deep results..." + $output = & $postScript -PRNumber $prNumber + $output | ForEach-Object { Write-Host $_ } + Write-Host "✅ Full AI summary comment posted with deep results" + } + + # Apply labels + $labelScript = ".github/scripts/shared/Update-AgentLabels.ps1" + if (Test-Path $labelScript) { + try { + . $labelScript + Apply-AgentLabels -PRNumber $prNumber -RepoRoot (Get-Location).Path + Write-Host "✅ Labels applied" + } catch { + Write-Host "⚠️ Label application failed: $_" + } + } + } + } else { + # ── PATCH MODE: Update existing comment with deep results ── + if (-not $deepBlock) { + Write-Host "No deep results and comment already exists — nothing to patch" + exit 0 + } + $existing = (gh api "repos/dotnet/maui/issues/comments/$commentId" --jq '.body') -join [Environment]::NewLine + if ([string]::IsNullOrWhiteSpace($existing)) { + Write-Host "Could not fetch comment body — aborting" + exit 0 + } + + $beginIdx = $existing.IndexOf($beginMarker) + $endIdx = $existing.IndexOf($endMarker) + if ($beginIdx -ge 0 -and $endIdx -gt $beginIdx) { + $before = $existing.Substring(0, $beginIdx).TrimEnd() + $after = $existing.Substring($endIdx + $endMarker.Length).TrimStart() + $newBody = $before + ([Environment]::NewLine + [Environment]::NewLine) + $deepBlock + $(if ($after) { ([Environment]::NewLine + [Environment]::NewLine) + $after } else { "" }) + } else { + $cleaned = $existing -split [Environment]::NewLine | Where-Object { + $_ -notmatch '^\s*[❌✅⏭️]\s*\*\*Deep UI tests\*\*' + } + $cleanedBody = ($cleaned -join [Environment]::NewLine) + $legacyMarker = '### 🧪 UI Test Execution Results' + $idx = $cleanedBody.IndexOf($legacyMarker) + $newBody = if ($idx -ge 0) { + $cleanedBody.Substring(0, $idx).TrimEnd() + ([Environment]::NewLine + [Environment]::NewLine) + $deepBlock + } else { + $cleanedBody.TrimEnd() + ([Environment]::NewLine + [Environment]::NewLine) + $deepBlock + } + } + + $tmp = New-TemporaryFile + @{ body = $newBody } | ConvertTo-Json -Depth 4 -Compress | Set-Content $tmp -Encoding UTF8 + gh api -X PATCH "repos/dotnet/maui/issues/comments/$commentId" --input $tmp.FullName | Out-Null + Write-Host "✅ Patched comment $commentId with deep UI test results ($totalPassed/$($totalPassed + $totalFailed))" + } + displayName: 'Post AI summary comment' + env: + GH_TOKEN: $(GH_COMMENT_TOKEN) diff --git a/src/TestUtils/src/UITest.Appium/AppiumAndroidApp.cs b/src/TestUtils/src/UITest.Appium/AppiumAndroidApp.cs index 13193cabdfae..8a68a23e00ac 100644 --- a/src/TestUtils/src/UITest.Appium/AppiumAndroidApp.cs +++ b/src/TestUtils/src/UITest.Appium/AppiumAndroidApp.cs @@ -122,6 +122,12 @@ private static AppiumOptions GetOptions(IConfig config) // The animation scale will be restored automatically after the instrumentation process ends. options.AddAdditionalAppiumOption("appium:disableWindowAnimation", true); + // On some emulator images (e.g. API 30 on hosted CI agents), the + // settings service may not fully support hidden_api_policy commands. + // This causes UiAutomator2 to throw "Can't find service: settings". + // Ignoring this non-critical error allows tests to proceed normally. + options.AddAdditionalAppiumOption("appium:ignoreHiddenApiPolicyError", true); + return options; } }