From c74587eb75e83817982379a16eecb8190a50d996 Mon Sep 17 00:00:00 2001 From: redth Date: Wed, 20 May 2026 10:15:06 -0400 Subject: [PATCH 1/5] Improve Windows DevFlow dialog automation Add native Windows UIA probing and action support for DevFlow agents so modal dialogs can be discovered and controlled without depending on the app UI dispatcher. Backport related Windows driver UIA helpers and add Windows dialog integration coverage. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Windows.WPF.Sample/Pages/AlertsPage.cs | 13 +- .../DevFlowAgentService.cs | 191 ++++- .../VisualTreeWalker.cs | 121 ++++ .../UiActionTests.cs | 41 ++ .../UiInspectionTests.cs | 52 ++ .../Microsoft.Maui.DevFlow.Agent.WPF.csproj | 4 + .../WpfAgentService.cs | 33 + .../WpfVisualTreeWalker.cs | 118 ++++ .../DevFlowAgentService.cs | 22 + .../Microsoft.Maui.DevFlow.Agent.csproj | 4 + .../VisualTreeWalker.cs | 120 ++++ .../Windows/NativeWindowProbe.cs | 625 +++++++++++++++++ .../AppDriverBase.cs | 6 +- .../AppDriverFactory.cs | 2 +- .../Windows/UIAutomationInterop.cs | 235 ++++++- .../WindowsAppDriver.cs | 660 +++++++++++++++--- 16 files changed, 2130 insertions(+), 117 deletions(-) create mode 100644 src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs diff --git a/platforms/Windows.WPF/samples/Windows.WPF.Sample/Pages/AlertsPage.cs b/platforms/Windows.WPF/samples/Windows.WPF.Sample/Pages/AlertsPage.cs index b502ad98e..517f0d3e1 100644 --- a/platforms/Windows.WPF/samples/Windows.WPF.Sample/Pages/AlertsPage.cs +++ b/platforms/Windows.WPF/samples/Windows.WPF.Sample/Pages/AlertsPage.cs @@ -16,13 +16,14 @@ public AlertsPage() var resultLabel = new Label { + AutomationId = "WpfDialogResultLabel", Text = "Result: (none yet)", FontSize = 14, TextColor = Colors.DodgerBlue, }; // Simple alert (OK only) - var simpleAlertBtn = new Button { Text = "Simple Alert (OK)" }; + var simpleAlertBtn = new Button { Text = "Simple Alert (OK)", AutomationId = "WpfSimpleAlertBtn" }; simpleAlertBtn.Clicked += async (s, e) => { resultLabel.Text = "Result: Alert requested..."; @@ -31,7 +32,7 @@ public AlertsPage() }; // Confirm alert (Accept / Cancel) - var confirmAlertBtn = new Button { Text = "Confirm Alert (Yes / No)" }; + var confirmAlertBtn = new Button { Text = "Confirm Alert (Yes / No)", AutomationId = "WpfConfirmAlertBtn" }; confirmAlertBtn.Clicked += async (s, e) => { bool answer = await ConnectedPage.DisplayAlertAsync("Confirm", "Do you want to proceed?", "Yes", "No"); @@ -39,7 +40,7 @@ public AlertsPage() }; // Action sheet - var actionSheetBtn = new Button { Text = "Action Sheet" }; + var actionSheetBtn = new Button { Text = "Action Sheet", AutomationId = "WpfActionSheetBtn" }; actionSheetBtn.Clicked += async (s, e) => { string action = await ConnectedPage.DisplayActionSheetAsync( @@ -51,7 +52,7 @@ public AlertsPage() }; // Action sheet without destruction - var actionSheet2Btn = new Button { Text = "Action Sheet (no destructive)" }; + var actionSheet2Btn = new Button { Text = "Action Sheet (no destructive)", AutomationId = "WpfActionSheetNoDestructiveBtn" }; actionSheet2Btn.Clicked += async (s, e) => { string action = await ConnectedPage.DisplayActionSheetAsync( @@ -63,7 +64,7 @@ public AlertsPage() }; // Prompt - var promptBtn = new Button { Text = "Text Prompt" }; + var promptBtn = new Button { Text = "Text Prompt", AutomationId = "WpfPromptBtn" }; promptBtn.Clicked += async (s, e) => { string name = await ConnectedPage.DisplayPromptAsync("Your Name", "What should we call you?", placeholder: "Enter name..."); @@ -71,7 +72,7 @@ public AlertsPage() }; // Prompt with initial value - var promptInitBtn = new Button { Text = "Prompt (with initial value)" }; + var promptInitBtn = new Button { Text = "Prompt (with initial value)", AutomationId = "WpfPromptInitialValueBtn" }; promptInitBtn.Clicked += async (s, e) => { string value = await ConnectedPage.DisplayPromptAsync("Edit Value", "Modify the text below:", initialValue: "Hello World"); diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs index 3a2bba220..911976076 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs @@ -57,6 +57,7 @@ public partial class DevFlowAgentService : IDisposable, IMarkerPublisher private int _uiHookGeneration = 1; private int _uiHookScanInFlight; private DateTime _lastUiHookScanTsUtc = DateTime.MinValue; + private const int NativeUiProbeTimeoutMs = 1500; private Shell? _hookedShell; private DateTime? _navigationStartedAtUtc; private string? _navigationTargetRoute; @@ -700,7 +701,10 @@ private async Task HandleTree(HttpRequest request) int.TryParse(depthStr, out maxDepth); var windowIndex = ParseWindowIndex(request); - var tree = await DispatchAsync(() => _treeWalker.WalkTree(_app, maxDepth, windowIndex)); + var tree = await CaptureUiOrNativeAsync( + () => _treeWalker.WalkTree(_app, maxDepth, windowIndex), + hwnds => _treeWalker.WalkNativeTree(hwnds, maxDepth), + windowIndex); return HttpResponse.Json(tree); } @@ -710,6 +714,12 @@ private async Task HandleElement(HttpRequest request) if (!request.RouteParams.TryGetValue("id", out var id)) return HttpResponse.Error("Element ID required"); + if (IsNativeElementId(id) && _treeWalker.SupportsNativeElements) + { + var nativeElement = await Task.Run(() => _treeWalker.GetNativeElementInfoById(id)); + return nativeElement != null ? HttpResponse.Json(nativeElement) : HttpResponse.NotFound($"Element '{id}' not found"); + } + var element = await DispatchAsync(() => { var el = _treeWalker.GetElementById(id, _app); @@ -735,7 +745,9 @@ private async Task HandleQuery(HttpRequest request) { try { - var results = await DispatchAsync(() => _treeWalker.QueryCss(_app, selector)); + var results = await CaptureUiOrNativeAsync( + () => _treeWalker.QueryCss(_app, selector), + hwnds => _treeWalker.QueryNative(hwnds, selector: selector)); return HttpResponse.Json(results); } catch (FormatException ex) @@ -751,10 +763,77 @@ private async Task HandleQuery(HttpRequest request) if (type == null && automationId == null && text == null) return HttpResponse.Error("At least one query parameter required: type, automationId, text, or selector"); - var simpleResults = await DispatchAsync(() => _treeWalker.Query(_app, type, automationId, text)); + var simpleResults = await CaptureUiOrNativeAsync( + () => _treeWalker.Query(_app, type, automationId, text), + hwnds => _treeWalker.QueryNative(hwnds, type, automationId, text)); return HttpResponse.Json(simpleResults); } + private async Task> CaptureUiOrNativeAsync( + Func> uiCallback, + Func, List> nativeCallback, + int? windowIndex = null) + { + if (!_treeWalker.SupportsNativeElements) + return await DispatchAsync(uiCallback); + + var hwndSource = new TaskCompletionSource>(TaskCreationOptions.RunContinuationsAsynchronously); + var uiTask = DispatchAsync(() => + { + try + { + hwndSource.TrySetResult(_app is null + ? Array.Empty() + : _treeWalker.GetKnownNativeWindowHandles(_app, windowIndex)); + } + catch (Exception ex) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] Native HWND discovery failed: {ex.GetBaseException().Message}"); + hwndSource.TrySetResult(Array.Empty()); + } + + return uiCallback(); + }); + + var nativeTask = Task.Run(async () => + { + var winner = await Task.WhenAny(hwndSource.Task, Task.Delay(NativeUiProbeTimeoutMs)).ConfigureAwait(false); + var hwnds = winner == hwndSource.Task + ? await hwndSource.Task.ConfigureAwait(false) + : Array.Empty(); + + try + { + return nativeCallback(hwnds); + } + catch (Exception ex) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] Native UI probe failed: {ex.GetBaseException().Message}"); + return []; + } + }); + + var uiWinner = await Task.WhenAny(uiTask, Task.Delay(NativeUiProbeTimeoutMs)).ConfigureAwait(false); + if (uiWinner != uiTask) + { + hwndSource.TrySetResult(Array.Empty()); + return await nativeTask.ConfigureAwait(false); + } + + var uiResult = await uiTask.ConfigureAwait(false); + var nativeResult = await nativeTask.ConfigureAwait(false); + if (nativeResult.Count == 0) + return uiResult; + + var merged = new List(uiResult.Count + nativeResult.Count); + merged.AddRange(uiResult); + merged.AddRange(nativeResult); + return merged; + } + + private static bool IsNativeElementId(string? elementId) + => elementId?.StartsWith("native:", StringComparison.Ordinal) == true; + private async Task HandleHitTest(HttpRequest request) { if (_app == null) return HttpResponse.Error("Agent not bound to app"); @@ -1429,6 +1508,19 @@ private async Task HandleTap(HttpRequest request) return HttpResponse.Error("elementId is required"); var startedAtUtc = DateTime.UtcNow; + if (IsNativeElementId(body.ElementId)) + { + var nativeResult = await Task.Run(() => _treeWalker.TryNativeElementTap(body.ElementId!)); + PublishUiOperationSpan( + "action.tap", + startedAtUtc, + nativeResult == "ok", + nativeResult == "ok" ? null : nativeResult, + body.ElementId); + + return nativeResult == "ok" ? HttpResponse.Ok("Tapped") : HttpResponse.Error(nativeResult); + } + var result = await DispatchAsync(() => { var el = _treeWalker.GetElementById(body.ElementId, _app); @@ -1437,10 +1529,14 @@ private async Task HandleTap(HttpRequest request) switch (el) { case Button btn: + if (TryScheduleNativeTapFirst(btn)) + return "ok"; try { btn.SendClicked(); } catch { if (btn is VisualElement ve && !TryNativeTap(ve)) return $"Native tap failed on Button"; } return "ok"; case ImageButton imgBtn: + if (TryScheduleNativeTapFirst(imgBtn)) + return "ok"; try { imgBtn.SendClicked(); } catch { if (imgBtn is VisualElement ve && !TryNativeTap(ve)) return $"Native tap failed on ImageButton"; } return "ok"; @@ -1645,6 +1741,15 @@ protected virtual bool TryNativeTap(VisualElement ve) return false; } + /// + /// Allows platforms whose native click handlers may open synchronous modal loops to schedule + /// a native tap before MAUI invokes the managed click event inline. + /// + protected virtual bool TryScheduleNativeTapFirst(VisualElement ve) + { + return false; + } + /// /// Attempts to tap a native platform view via handler for non-VisualElement IView types (e.g. Comet views). /// Uses reflection to get the PlatformView from the handler and invoke SendAccessibilityAction or performClick. @@ -1700,6 +1805,32 @@ private async Task HandleFill(HttpRequest request) return HttpResponse.Error("elementId and text are required"); var startedAtUtc = DateTime.UtcNow; + if (IsNativeElementId(body.ElementId)) + { + var nativeResult = await Task.Run(() => _treeWalker.TryNativeElementSetValue(body.ElementId!, body.Text!)); + PublishUiOperationSpan( + "action.fill", + startedAtUtc, + nativeResult == "ok", + nativeResult == "ok" ? null : nativeResult, + body.ElementId, + new { textLength = body.Text.Length }); + + if (nativeResult == "ok") + { + PublishUiEvent("treeChange", new + { + changeType = "modified", + elementId = body.ElementId, + elementType = "input", + parentId = (string?)null, + timestamp = DateTimeOffset.UtcNow.ToString("O") + }); + } + + return nativeResult == "ok" ? HttpResponse.Ok("Text set") : HttpResponse.Error(nativeResult); + } + var result = await DispatchAsync(() => { var el = _treeWalker.GetElementById(body.ElementId, _app); @@ -1756,6 +1887,32 @@ private async Task HandleClear(HttpRequest request) return HttpResponse.Error("elementId is required"); var startedAtUtc = DateTime.UtcNow; + if (IsNativeElementId(body.ElementId)) + { + var nativeResult = await Task.Run(() => _treeWalker.TryNativeElementSetValue(body.ElementId!, string.Empty)); + var nativeSuccess = nativeResult == "ok"; + PublishUiOperationSpan( + "action.clear", + startedAtUtc, + nativeSuccess, + nativeSuccess ? null : nativeResult, + body.ElementId); + + if (nativeSuccess) + { + PublishUiEvent("treeChange", new + { + changeType = "modified", + elementId = body.ElementId, + elementType = "input", + parentId = (string?)null, + timestamp = DateTimeOffset.UtcNow.ToString("O") + }); + } + + return nativeSuccess ? HttpResponse.Ok("Cleared") : HttpResponse.Error(nativeResult); + } + var success = await DispatchAsync(() => { var el = _treeWalker.GetElementById(body.ElementId, _app); @@ -1808,6 +1965,20 @@ private async Task HandleFocus(HttpRequest request) return HttpResponse.Error("elementId is required"); var startedAtUtc = DateTime.UtcNow; + if (IsNativeElementId(body.ElementId)) + { + var nativeResult = await Task.Run(() => _treeWalker.TryNativeElementFocus(body.ElementId!)); + var nativeSuccess = nativeResult == "ok"; + PublishUiOperationSpan( + "action.focus", + startedAtUtc, + nativeSuccess, + nativeSuccess ? null : nativeResult, + body.ElementId); + + return nativeSuccess ? HttpResponse.Ok("Focused") : HttpResponse.Error(nativeResult); + } + var success = await DispatchAsync(() => { var el = _treeWalker.GetElementById(body.ElementId, _app); @@ -2327,6 +2498,20 @@ private async Task HandleScroll(HttpRequest request) var position = ParseScrollToPosition(body.ScrollToPosition); var startedAtUtc = DateTime.UtcNow; + if (IsNativeElementId(body.ElementId)) + { + var nativeResult = await Task.Run(() => _treeWalker.TryNativeElementScroll(body.ElementId!, body.DeltaX, body.DeltaY)); + PublishUiOperationSpan( + "action.scroll", + startedAtUtc, + nativeResult == "ok", + nativeResult == "ok" ? null : nativeResult, + body.ElementId, + new { body.DeltaX, body.DeltaY, body.Animated }); + + return nativeResult == "ok" ? HttpResponse.Ok("Scrolled") : HttpResponse.Error(nativeResult); + } + var result = await DispatchAsync(async () => { // Priority 1: Scroll by item index on a specific ItemsView diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/VisualTreeWalker.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/VisualTreeWalker.cs index ed07bc307..33d65167e 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/VisualTreeWalker.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/VisualTreeWalker.cs @@ -320,6 +320,127 @@ public List WalkTree(Application app, int maxDepth = 0, int? window return results; } + /// + /// Indicates whether this walker can discover native platform elements that are not + /// represented in the MAUI visual tree. + /// + public virtual bool SupportsNativeElements => false; + + /// + /// Returns native top-level window handles already represented by the MAUI tree so + /// native probes can avoid duplicating them. + /// + public virtual IReadOnlyList GetKnownNativeWindowHandles(Application app, int? windowIndex = null) + => Array.Empty(); + + /// + /// Captures native platform elements, typically on a worker thread. + /// + public virtual List WalkNativeTree(IReadOnlyList knownWindowHandles, int maxDepth = 0) + => []; + + /// + /// Resolves a native platform object by DevFlow element id. + /// + public virtual object? GetNativeElementById(string id) + => null; + + /// + /// Resolves native element details by DevFlow element id. + /// + public virtual ElementInfo? GetNativeElementInfoById(string id) + => FlattenElementInfos(WalkNativeTree(Array.Empty())) + .FirstOrDefault(e => e.Id.Equals(id, StringComparison.OrdinalIgnoreCase)); + + /// + /// Queries native platform elements using the same filter shape as MAUI tree queries. + /// + public virtual List QueryNative( + IReadOnlyList knownWindowHandles, + string? type = null, + string? automationId = null, + string? text = null, + string? selector = null) + { + var tree = WalkNativeTree(knownWindowHandles); + if (!string.IsNullOrWhiteSpace(selector)) + { + return CssSelectorEngine.Query(tree, selector) + .Where(e => MatchesElementInfo(e, type, automationId, text)) + .ToList(); + } + + return FlattenElementInfos(tree) + .Where(e => MatchesElementInfo(e, type, automationId, text)) + .OrderByDescending(e => e.AutomationId is not null) + .ThenByDescending(e => e.Traits?.Contains("actionable") == true) + .ThenBy(e => e.Id, StringComparer.Ordinal) + .ToList(); + } + + /// + /// Performs a native tap/invoke action for a native element id. + /// + public virtual string TryNativeElementTap(string elementId) + => "Native element actions are not supported on this platform"; + + /// + /// Sets native element text/value for a native element id. + /// + public virtual string TryNativeElementSetValue(string elementId, string value) + => "Native element value actions are not supported on this platform"; + + /// + /// Sets native keyboard focus for a native element id. + /// + public virtual string TryNativeElementFocus(string elementId) + => "Native element focus is not supported on this platform"; + + /// + /// Scrolls or scrolls into view a native element id. + /// + public virtual string TryNativeElementScroll(string elementId, double deltaX, double deltaY) + => "Native element scrolling is not supported on this platform"; + + public static IEnumerable FlattenElementInfos(IEnumerable roots) + { + foreach (var root in roots) + { + yield return root; + if (root.Children is not null) + { + foreach (var child in FlattenElementInfos(root.Children)) + yield return child; + } + } + } + + private static bool MatchesElementInfo(ElementInfo info, string? type, string? automationId, string? text) + { + if (!string.IsNullOrWhiteSpace(type) && + !info.Type.Equals(type, StringComparison.OrdinalIgnoreCase) && + !(info.FullType?.EndsWith(type, StringComparison.OrdinalIgnoreCase) == true)) + { + return false; + } + + if (!string.IsNullOrWhiteSpace(automationId) && + !string.Equals(info.AutomationId, automationId, StringComparison.OrdinalIgnoreCase) && + !info.Id.Equals(automationId, StringComparison.OrdinalIgnoreCase)) + { + return false; + } + + if (!string.IsNullOrWhiteSpace(text) && + !(info.Text?.Contains(text, StringComparison.OrdinalIgnoreCase) == true) && + !(info.Value?.Contains(text, StringComparison.OrdinalIgnoreCase) == true)) + { + return false; + } + + return true; + } + /// /// Walks from a specific element. /// diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.IntegrationTests/UiActionTests.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.IntegrationTests/UiActionTests.cs index 52563722d..701d3af35 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.IntegrationTests/UiActionTests.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.IntegrationTests/UiActionTests.cs @@ -1,5 +1,6 @@ using System.Net; using Microsoft.Maui.DevFlow.Agent.IntegrationTests.Fixtures; +using Microsoft.Maui.DevFlow.Driver; using Xunit.Abstractions; namespace Microsoft.Maui.DevFlow.Agent.IntegrationTests; @@ -330,6 +331,31 @@ public async Task Fill_MultipleEntries_SetsAllText() await Client.ClearAsync(descEntry.Id); } + [Fact] + public async Task Tap_WindowsNativeAlertButton_DismissesDialog() + { + if (!Platform.Equals("windows", StringComparison.OrdinalIgnoreCase)) + { + Output.WriteLine("Windows native dialog action test skipped on non-Windows platform."); + return; + } + + await NavigateToPageAsync("//dialogs", "AlertOkOnlyBtn"); + + var trigger = await FindElementAsync("AlertOkOnlyBtn"); + var triggered = await Client.TapAsync(trigger.Id).WaitAsync(TimeSpan.FromSeconds(5)); + Assert.True(triggered); + + var okButton = await WaitForNativeButtonAsync("OK"); + Assert.True(await Client.TapAsync(okButton.Id)); + + await WaitForAsync(async () => + { + var status = await FindElementAsync("DialogStatusLabel"); + return status.Text?.Contains("OK dismissed", StringComparison.OrdinalIgnoreCase) == true; + }, timeoutMs: 5000); + } + async Task CleanupAddedTodoAsync(string todoTitle) { try @@ -347,4 +373,19 @@ async Task CleanupAddedTodoAsync(string todoTitle) Output.WriteLine($"Cleanup warning: Could not delete todo '{todoTitle}': {ex.Message}"); } } + + async Task WaitForNativeButtonAsync(string text) + { + ElementInfo? match = null; + await WaitForAsync(async () => + { + var buttons = await Client.QueryAsync(type: "Button", text: text); + match = buttons.FirstOrDefault(e => + e.Id.StartsWith("native:", StringComparison.Ordinal) && + string.Equals(e.Text, text, StringComparison.OrdinalIgnoreCase)); + return match is not null; + }, timeoutMs: 5000); + + return match!; + } } diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.IntegrationTests/UiInspectionTests.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.IntegrationTests/UiInspectionTests.cs index 01c94ca3d..d510dbab4 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.IntegrationTests/UiInspectionTests.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.IntegrationTests/UiInspectionTests.cs @@ -214,4 +214,56 @@ public async Task Screenshot_OfElement_ReturnsImage() Assert.True(bytes.Length > 0); } + + [Fact] + public async Task Tree_WindowsNativeDialog_IncludesNativeElements() + { + if (!Platform.Equals("windows", StringComparison.OrdinalIgnoreCase)) + { + Output.WriteLine("Windows native dialog inspection test skipped on non-Windows platform."); + return; + } + + await NavigateToPageAsync("//dialogs", "AlertOkOnlyBtn"); + + var trigger = await FindElementAsync("AlertOkOnlyBtn"); + Assert.True(await Client.TapAsync(trigger.Id).WaitAsync(TimeSpan.FromSeconds(5))); + + var okButton = await WaitForNativeButtonAsync("OK"); + var tree = await Client.GetTreeAsync(maxDepth: 8); + var flattened = Flatten(tree).ToList(); + + Assert.Contains(flattened, e => e.Id.StartsWith("native:", StringComparison.Ordinal)); + Assert.Contains(flattened, e => e.Id.StartsWith("native:", StringComparison.Ordinal) && e.Traits?.Contains("dialog") == true); + + Assert.True(await Client.TapAsync(okButton.Id)); + } + + async Task WaitForNativeButtonAsync(string text) + { + ElementInfo? match = null; + await WaitForAsync(async () => + { + var buttons = await Client.QueryAsync(type: "Button", text: text); + match = buttons.FirstOrDefault(e => + e.Id.StartsWith("native:", StringComparison.Ordinal) && + string.Equals(e.Text, text, StringComparison.OrdinalIgnoreCase)); + return match is not null; + }, timeoutMs: 5000); + + return match!; + } + + static IEnumerable Flatten(IEnumerable elements) + { + foreach (var element in elements) + { + yield return element; + if (element.Children is not null) + { + foreach (var child in Flatten(element.Children)) + yield return child; + } + } + } } diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/Microsoft.Maui.DevFlow.Agent.WPF.csproj b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/Microsoft.Maui.DevFlow.Agent.WPF.csproj index fdbc8b0ab..72acee860 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/Microsoft.Maui.DevFlow.Agent.WPF.csproj +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/Microsoft.Maui.DevFlow.Agent.WPF.csproj @@ -20,4 +20,8 @@ + + + + diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfAgentService.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfAgentService.cs index 94392d0f8..119a07a6c 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfAgentService.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfAgentService.cs @@ -135,6 +135,39 @@ protected override bool TryNativeTap(VisualElement ve) return false; } + protected override bool TryScheduleNativeTapFirst(VisualElement ve) + { + try + { + if (ve.Handler?.PlatformView is not ButtonBase buttonBase) + return false; + + var peer = System.Windows.Automation.Peers.UIElementAutomationPeer.FromElement(buttonBase) + ?? System.Windows.Automation.Peers.UIElementAutomationPeer.CreatePeerForElement(buttonBase); + + if (peer?.GetPattern(System.Windows.Automation.Peers.PatternInterface.Invoke) + is System.Windows.Automation.Provider.IInvokeProvider invoke) + { + buttonBase.Dispatcher.BeginInvoke(() => invoke.Invoke()); + return true; + } + + if (peer?.GetPattern(System.Windows.Automation.Peers.PatternInterface.Toggle) + is System.Windows.Automation.Provider.IToggleProvider toggle) + { + buttonBase.Dispatcher.BeginInvoke(() => toggle.Toggle()); + return true; + } + + buttonBase.Dispatcher.BeginInvoke(() => + buttonBase.RaiseEvent(new System.Windows.RoutedEventArgs(ButtonBase.ClickEvent, buttonBase))); + return true; + } + catch { } + + return false; + } + protected override async Task CaptureElementScreenshotAsync(VisualElement element) { try diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs index 7eba8297f..3d9ececb1 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs @@ -2,9 +2,11 @@ using System.Windows; using System.Windows.Controls; using System.Windows.Controls.Primitives; +using System.Windows.Interop; using System.Windows.Media; using Microsoft.Maui.Controls; using Microsoft.Maui.DevFlow.Agent.Core; +using Microsoft.Maui.DevFlow.Agent.Windows; namespace Microsoft.Maui.DevFlow.Agent.WPF; @@ -13,6 +15,10 @@ namespace Microsoft.Maui.DevFlow.Agent.WPF; /// public class WpfVisualTreeWalker : VisualTreeWalker { + private readonly NativeWindowProbe _nativeProbe = new(); + private readonly object _nativeObjectsLock = new(); + private Dictionary _nativeObjects = new(StringComparer.OrdinalIgnoreCase); + protected override BoundsInfo? ResolveWindowBounds(VisualElement ve) { try @@ -149,4 +155,116 @@ protected override void PopulateNativeInfo(ElementInfo info, VisualElement ve) catch { } return null; } + + public override bool SupportsNativeElements => true; + + public override IReadOnlyList GetKnownNativeWindowHandles(Microsoft.Maui.Controls.Application app, int? windowIndex = null) + { + var handles = new List(); + + if (windowIndex is not null) + { + var window = windowIndex.Value >= 0 && windowIndex.Value < app.Windows.Count + ? app.Windows[windowIndex.Value] + : null; + var handle = GetWindowHandle(window); + if (handle != IntPtr.Zero) + handles.Add(handle); + return handles; + } + + foreach (var window in app.Windows) + { + var handle = GetWindowHandle(window); + if (handle != IntPtr.Zero) + handles.Add(handle); + } + + return handles; + } + + public override List WalkNativeTree(IReadOnlyList knownWindowHandles, int maxDepth = 0) + { + var roots = new List(); + var nativeObjects = new Dictionary(StringComparer.OrdinalIgnoreCase); + _nativeProbe.AppendNativeWindows(roots, nativeObjects, knownWindowHandles, maxDepth); + + lock (_nativeObjectsLock) + _nativeObjects = nativeObjects; + + return roots; + } + + public override object? GetNativeElementById(string id) + { + lock (_nativeObjectsLock) + { + if (NativeWindowProbe.TryGetAutomationElement(_nativeObjects, id) is { } cached) + return cached; + } + + WalkNativeTree(Array.Empty()); + lock (_nativeObjectsLock) + return NativeWindowProbe.TryGetAutomationElement(_nativeObjects, id); + } + + public override ElementInfo? GetNativeElementInfoById(string id) + => FlattenElementInfos(WalkNativeTree(Array.Empty())) + .FirstOrDefault(e => e.Id.Equals(id, StringComparison.OrdinalIgnoreCase)); + + public override string TryNativeElementTap(string elementId) + { + var element = GetNativeAutomationElement(elementId); + if (element is null) + return $"Native element '{elementId}' was not found"; + + return NativeWindowProbe.TryInvoke(element) + ? "ok" + : $"Native element '{elementId}' does not support invoke, toggle, selection, or expand/collapse"; + } + + public override string TryNativeElementSetValue(string elementId, string value) + { + var element = GetNativeAutomationElement(elementId); + if (element is null) + return $"Native element '{elementId}' was not found"; + + return NativeWindowProbe.TrySetValue(element, value) + ? "ok" + : $"Native element '{elementId}' does not support writable value"; + } + + public override string TryNativeElementFocus(string elementId) + { + var element = GetNativeAutomationElement(elementId); + if (element is null) + return $"Native element '{elementId}' was not found"; + + return NativeWindowProbe.TryFocus(element) + ? "ok" + : $"Native element '{elementId}' could not be focused"; + } + + public override string TryNativeElementScroll(string elementId, double deltaX, double deltaY) + { + var element = GetNativeAutomationElement(elementId); + if (element is null) + return $"Native element '{elementId}' was not found"; + + return NativeWindowProbe.TryScroll(element, deltaX, deltaY) + ? "ok" + : $"Native element '{elementId}' does not support scrolling"; + } + + private System.Windows.Automation.AutomationElement? GetNativeAutomationElement(string id) + => GetNativeElementById(id) as System.Windows.Automation.AutomationElement; + + private static IntPtr GetWindowHandle(Microsoft.Maui.Controls.Window? window) + { + if (window?.Handler?.PlatformView is not FrameworkElement frameworkElement) + return IntPtr.Zero; + + var nativeWindow = System.Windows.Window.GetWindow(frameworkElement); + return nativeWindow is null ? IntPtr.Zero : new WindowInteropHelper(nativeWindow).Handle; + } } diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/DevFlowAgentService.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/DevFlowAgentService.cs index 9a418ce65..b6cdf275f 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/DevFlowAgentService.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/DevFlowAgentService.cs @@ -530,6 +530,28 @@ protected override bool TryNativeTap(VisualElement ve) return false; } + protected override bool TryScheduleNativeTapFirst(VisualElement ve) + { + try + { +#if WINDOWS + if (ve.Handler?.PlatformView is Microsoft.UI.Xaml.Controls.Primitives.ButtonBase buttonBase) + { + var peer = + Microsoft.UI.Xaml.Automation.Peers.FrameworkElementAutomationPeer.FromElement(buttonBase) ?? + Microsoft.UI.Xaml.Automation.Peers.FrameworkElementAutomationPeer.CreatePeerForElement(buttonBase); + if (peer?.GetPattern(Microsoft.UI.Xaml.Automation.Peers.PatternInterface.Invoke) is Microsoft.UI.Xaml.Automation.Provider.IInvokeProvider invokeProvider) + { + return buttonBase.DispatcherQueue.TryEnqueue(() => invokeProvider.Invoke()); + } + } +#endif + } + catch { } + + return false; + } + #if MACOS protected override async Task CaptureScreenshotAsync(VisualElement rootElement) { diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Microsoft.Maui.DevFlow.Agent.csproj b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Microsoft.Maui.DevFlow.Agent.csproj index b2e64612f..afa35cefc 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Microsoft.Maui.DevFlow.Agent.csproj +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Microsoft.Maui.DevFlow.Agent.csproj @@ -18,6 +18,10 @@ + + + + diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs index 39a2d2510..5db121bec 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs @@ -6,6 +6,9 @@ #if MACOS using AppKit; #endif +#if WINDOWS +using Microsoft.Maui.DevFlow.Agent.Windows; +#endif namespace Microsoft.Maui.DevFlow.Agent; @@ -15,6 +18,12 @@ namespace Microsoft.Maui.DevFlow.Agent; /// public class PlatformVisualTreeWalker : VisualTreeWalker { +#if WINDOWS + private readonly NativeWindowProbe _nativeProbe = new(); + private readonly object _nativeObjectsLock = new(); + private Dictionary _nativeObjects = new(StringComparer.OrdinalIgnoreCase); +#endif + protected override void PopulateNativeInfo(ElementInfo info, VisualElement ve) { try @@ -609,6 +618,117 @@ private void PopulateNativeInfoAndroid(ElementInfo info, object marker) } #if WINDOWS + public override bool SupportsNativeElements => true; + + public override IReadOnlyList GetKnownNativeWindowHandles(Application app, int? windowIndex = null) + { + var handles = new List(); + + if (windowIndex is not null) + { + var window = windowIndex.Value >= 0 && windowIndex.Value < app.Windows.Count + ? app.Windows[windowIndex.Value] + : null; + var handle = GetWindowHandle(window); + if (handle != IntPtr.Zero) + handles.Add(handle); + return handles; + } + + foreach (var window in app.Windows) + { + var handle = GetWindowHandle(window); + if (handle != IntPtr.Zero) + handles.Add(handle); + } + + return handles; + } + + public override List WalkNativeTree(IReadOnlyList knownWindowHandles, int maxDepth = 0) + { + var roots = new List(); + var nativeObjects = new Dictionary(StringComparer.OrdinalIgnoreCase); + _nativeProbe.AppendNativeWindows(roots, nativeObjects, knownWindowHandles, maxDepth); + + lock (_nativeObjectsLock) + _nativeObjects = nativeObjects; + + return roots; + } + + public override object? GetNativeElementById(string id) + { + lock (_nativeObjectsLock) + { + if (NativeWindowProbe.TryGetAutomationElement(_nativeObjects, id) is { } cached) + return cached; + } + + WalkNativeTree(Array.Empty()); + lock (_nativeObjectsLock) + return NativeWindowProbe.TryGetAutomationElement(_nativeObjects, id); + } + + public override ElementInfo? GetNativeElementInfoById(string id) + => FlattenElementInfos(WalkNativeTree(Array.Empty())) + .FirstOrDefault(e => e.Id.Equals(id, StringComparison.OrdinalIgnoreCase)); + + public override string TryNativeElementTap(string elementId) + { + var element = GetNativeAutomationElement(elementId); + if (element is null) + return $"Native element '{elementId}' was not found"; + + return NativeWindowProbe.TryInvoke(element) + ? "ok" + : $"Native element '{elementId}' does not support invoke, toggle, selection, or expand/collapse"; + } + + public override string TryNativeElementSetValue(string elementId, string value) + { + var element = GetNativeAutomationElement(elementId); + if (element is null) + return $"Native element '{elementId}' was not found"; + + return NativeWindowProbe.TrySetValue(element, value) + ? "ok" + : $"Native element '{elementId}' does not support writable value"; + } + + public override string TryNativeElementFocus(string elementId) + { + var element = GetNativeAutomationElement(elementId); + if (element is null) + return $"Native element '{elementId}' was not found"; + + return NativeWindowProbe.TryFocus(element) + ? "ok" + : $"Native element '{elementId}' could not be focused"; + } + + public override string TryNativeElementScroll(string elementId, double deltaX, double deltaY) + { + var element = GetNativeAutomationElement(elementId); + if (element is null) + return $"Native element '{elementId}' was not found"; + + return NativeWindowProbe.TryScroll(element, deltaX, deltaY) + ? "ok" + : $"Native element '{elementId}' does not support scrolling"; + } + + private System.Windows.Automation.AutomationElement? GetNativeAutomationElement(string id) + => GetNativeElementById(id) as System.Windows.Automation.AutomationElement; + + private static IntPtr GetWindowHandle(Microsoft.Maui.Controls.Window? window) + { + if (window?.Handler?.PlatformView is Microsoft.UI.Xaml.Window nativeWindow) + return WinRT.Interop.WindowNative.GetWindowHandle(nativeWindow); + + return IntPtr.Zero; + } + private BoundsInfo? ResolveBoundsWindows(object marker) { // Windows NavigationView doesn't expose easily queryable sub-parts diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs new file mode 100644 index 000000000..9a83b0796 --- /dev/null +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs @@ -0,0 +1,625 @@ +#if WINDOWS +using System.Runtime.InteropServices; +using System.Windows.Automation; +using Microsoft.Maui.DevFlow.Agent.Core; + +namespace Microsoft.Maui.DevFlow.Agent.Windows; + +/// +/// Discovers native Windows UI Automation elements that are not reliably represented +/// in the MAUI visual tree, including modal dialogs and dialog-like popups. +/// +public sealed class NativeWindowProbe +{ + private const int DefaultMaxDepth = 10; + private const int MaxNodesPerWindow = 256; + private static readonly int CurrentProcessId = Environment.ProcessId; + + private static readonly HashSet CommonDialogButtonLabels = new(StringComparer.OrdinalIgnoreCase) + { + "OK", "CANCEL", "YES", "NO", "CLOSE", "DISMISS", "RETRY", "ABORT", "IGNORE", "CONTINUE", "ALLOW", "DON'T ALLOW", "DELETE", "KEEP" + }; + + public void AppendNativeWindows( + List roots, + Dictionary nativeObjects, + IEnumerable knownHwnds, + int? maxDepth = null) + { + var known = knownHwnds.Where(h => h != IntPtr.Zero).Distinct().ToArray(); + AppendKnownWindowDialogSubtrees(roots, nativeObjects, known, maxDepth); + AppendForeignTopLevelWindows(roots, nativeObjects, known, maxDepth); + } + + public void AppendForeignTopLevelWindows( + List roots, + Dictionary nativeObjects, + IEnumerable knownHwnds, + int? maxDepth = null) + { + var depth = maxDepth is > 0 ? maxDepth.Value : DefaultMaxDepth; + var known = new HashSet(knownHwnds.Select(h => h.ToInt64())); + IReadOnlyList windows; + try + { + windows = EnumerateProcessTopLevels(); + } + catch (Exception ex) when (ex is COMException or InvalidOperationException or ElementNotAvailableException) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] NativeWindowProbe enumeration failed: {ex.Message}"); + return; + } + + var rootIndex = roots.Count; + foreach (var window in windows) + { + IntPtr hwnd; + try + { + hwnd = new IntPtr(window.Current.NativeWindowHandle); + } + catch (ElementNotAvailableException) + { + continue; + } + + if (hwnd == IntPtr.Zero || known.Contains(hwnd.ToInt64())) + continue; + + var prefix = $"native:hwnd:0x{hwnd.ToInt64():X}"; + var info = WalkAutomationElement(window, prefix, [rootIndex++], nativeObjects, 0, depth, isRoot: true); + if (info is null) + continue; + + info.Traits ??= []; + if (!info.Traits.Contains("dialog")) + info.Traits.Add("dialog"); + roots.Add(info); + } + } + + public static AutomationElement? TryGetAutomationElement(IReadOnlyDictionary nativeObjects, string id) + => nativeObjects.TryGetValue(id, out var native) && native is AutomationElement element ? element : null; + + public static bool TryInvoke(AutomationElement element) + { + try + { + if (element.TryGetCurrentPattern(InvokePattern.Pattern, out var invokePattern) && invokePattern is InvokePattern invoke) + { + invoke.Invoke(); + return true; + } + + if (element.TryGetCurrentPattern(TogglePattern.Pattern, out var togglePattern) && togglePattern is TogglePattern toggle) + { + toggle.Toggle(); + return true; + } + + if (element.TryGetCurrentPattern(SelectionItemPattern.Pattern, out var selectionPattern) && selectionPattern is SelectionItemPattern selection) + { + selection.Select(); + return true; + } + + if (element.TryGetCurrentPattern(ExpandCollapsePattern.Pattern, out var expandPattern) && expandPattern is ExpandCollapsePattern expand) + { + if (expand.Current.ExpandCollapseState == ExpandCollapseState.Collapsed) + expand.Expand(); + else + expand.Collapse(); + return true; + } + } + catch (Exception ex) when (ex is ElementNotEnabledException or ElementNotAvailableException or InvalidOperationException or COMException) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] NativeWindowProbe.TryInvoke failed: {ex.Message}"); + } + + return false; + } + + public static bool TrySetValue(AutomationElement element, string value) + { + try + { + if (element.TryGetCurrentPattern(ValuePattern.Pattern, out var pattern) && + pattern is ValuePattern valuePattern && + !valuePattern.Current.IsReadOnly) + { + valuePattern.SetValue(value); + return true; + } + } + catch (Exception ex) when (ex is ElementNotEnabledException or ElementNotAvailableException or InvalidOperationException or COMException) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] NativeWindowProbe.TrySetValue failed: {ex.Message}"); + } + + return false; + } + + public static bool TryFocus(AutomationElement element) + { + try + { + element.SetFocus(); + return true; + } + catch (Exception ex) when (ex is ElementNotEnabledException or ElementNotAvailableException or InvalidOperationException or COMException) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] NativeWindowProbe.TryFocus failed: {ex.Message}"); + } + + return false; + } + + public static bool TryScroll(AutomationElement element, double deltaX, double deltaY) + { + try + { + if ((deltaX != 0 || deltaY != 0) && + element.TryGetCurrentPattern(ScrollPattern.Pattern, out var scrollPattern) && + scrollPattern is ScrollPattern scroll) + { + scroll.Scroll(ToScrollAmount(deltaX), ToScrollAmount(deltaY)); + return true; + } + + if (element.TryGetCurrentPattern(ScrollItemPattern.Pattern, out var itemPattern) && + itemPattern is ScrollItemPattern item) + { + item.ScrollIntoView(); + return true; + } + } + catch (Exception ex) when (ex is ElementNotEnabledException or ElementNotAvailableException or InvalidOperationException or COMException) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] NativeWindowProbe.TryScroll failed: {ex.Message}"); + } + + return false; + } + + private static ScrollAmount ToScrollAmount(double delta) + { + if (delta > 0) + return ScrollAmount.LargeIncrement; + if (delta < 0) + return ScrollAmount.LargeDecrement; + return ScrollAmount.NoAmount; + } + + private void AppendKnownWindowDialogSubtrees( + List roots, + Dictionary nativeObjects, + IReadOnlyList knownHwnds, + int? maxDepth) + { + var depth = maxDepth is > 0 ? maxDepth.Value : DefaultMaxDepth; + var rootIndex = roots.Count; + + foreach (var hwnd in knownHwnds) + { + AutomationElement? root; + try + { + root = AutomationElement.FromHandle(hwnd); + } + catch (Exception ex) when (ex is ElementNotAvailableException or COMException or ArgumentException) + { + continue; + } + + if (root is null) + continue; + + var dialogIndex = 0; + foreach (var candidate in FindDialogCandidates(root, hwnd)) + { + var prefix = $"native:hwnd:0x{hwnd.ToInt64():X}:dialog:{dialogIndex++}"; + var info = WalkAutomationElement(candidate, prefix, [rootIndex++], nativeObjects, 0, depth, isRoot: true); + if (info is null) + continue; + + info.Traits ??= []; + if (!info.Traits.Contains("dialog")) + info.Traits.Add("dialog"); + roots.Add(info); + } + } + } + + private static IReadOnlyList FindDialogCandidates(AutomationElement root, IntPtr rootHwnd) + { + var candidates = new List(); + AutomationElementCollection descendants; + try + { + descendants = root.FindAll(TreeScope.Descendants, System.Windows.Automation.Condition.TrueCondition); + } + catch (Exception ex) when (ex is ElementNotAvailableException or COMException) + { + return candidates; + } + + for (var i = 0; i < descendants.Count; i++) + { + var element = descendants[i]; + if (element is null || !IsDialogCandidate(element, rootHwnd)) + continue; + + if (candidates.Any(existing => IsAncestor(existing, element))) + continue; + + candidates.RemoveAll(existing => IsAncestor(element, existing)); + candidates.Add(element); + } + + return candidates; + } + + private static bool IsDialogCandidate(AutomationElement element, IntPtr rootHwnd) + { + AutomationElement.AutomationElementInformation current; + try + { + current = element.Current; + } + catch (ElementNotAvailableException) + { + return false; + } + + if (current.NativeWindowHandle != 0 && current.NativeWindowHandle == rootHwnd.ToInt64()) + return false; + + if (TryGetIsModal(element) == true) + return true; + + var className = current.ClassName ?? string.Empty; + var localizedType = current.ControlType?.LocalizedControlType ?? string.Empty; + var name = current.Name ?? string.Empty; + var looksDialogLike = + className.Contains("Dialog", StringComparison.OrdinalIgnoreCase) || + localizedType.Contains("dialog", StringComparison.OrdinalIgnoreCase) || + name.Contains("dialog", StringComparison.OrdinalIgnoreCase) || + name.Contains("alert", StringComparison.OrdinalIgnoreCase); + + return looksDialogLike && HasCommonDialogButton(element); + } + + private static bool IsAncestor(AutomationElement ancestor, AutomationElement descendant) + { + var walker = TreeWalker.RawViewWalker; + AutomationElement? current; + try + { + current = walker.GetParent(descendant); + } + catch (ElementNotAvailableException) + { + return false; + } + + while (current is not null) + { + if (SameElement(current, ancestor)) + return true; + + try + { + current = walker.GetParent(current); + } + catch (ElementNotAvailableException) + { + return false; + } + } + + return false; + } + + private static bool SameElement(AutomationElement first, AutomationElement second) + { + try + { + return first.Equals(second) || first.GetRuntimeId().SequenceEqual(second.GetRuntimeId()); + } + catch (ElementNotAvailableException) + { + return false; + } + } + + private static bool HasCommonDialogButton(AutomationElement root) + { + try + { + var buttons = root.FindAll( + TreeScope.Descendants, + new System.Windows.Automation.PropertyCondition(AutomationElement.ControlTypeProperty, ControlType.Button)); + + for (var i = 0; i < buttons.Count; i++) + { + var name = buttons[i]?.Current.Name; + if (!string.IsNullOrWhiteSpace(name) && + CommonDialogButtonLabels.Contains(NormalizeLabel(name))) + { + return true; + } + } + } + catch (Exception ex) when (ex is ElementNotAvailableException or COMException) + { + } + + return false; + } + + private static IReadOnlyList EnumerateProcessTopLevels() + { + var hwnds = new List(); + EnumWindows((hwnd, _) => + { + if (!IsWindowVisible(hwnd)) + return true; + + GetWindowThreadProcessId(hwnd, out var pid); + if ((int)pid == CurrentProcessId) + hwnds.Add(hwnd); + + return true; + }, IntPtr.Zero); + + var result = new List(hwnds.Count); + foreach (var hwnd in hwnds) + { + AutomationElement? element; + try + { + element = AutomationElement.FromHandle(hwnd); + } + catch (Exception ex) when (ex is ElementNotAvailableException or COMException or ArgumentException) + { + continue; + } + + if (element is not null) + result.Add(element); + } + + return result; + } + + [DllImport("user32.dll")] + [return: MarshalAs(UnmanagedType.Bool)] + private static extern bool EnumWindows(EnumWindowsProc lpEnumFunc, IntPtr lParam); + + [DllImport("user32.dll")] + [return: MarshalAs(UnmanagedType.Bool)] + private static extern bool IsWindowVisible(IntPtr hWnd); + + [DllImport("user32.dll", SetLastError = true)] + private static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint processId); + + private delegate bool EnumWindowsProc(IntPtr hwnd, IntPtr lParam); + + private static ElementInfo? WalkAutomationElement( + AutomationElement element, + string prefix, + IReadOnlyList path, + Dictionary nativeObjects, + int depth, + int maxDepth, + bool isRoot) + { + if (depth > maxDepth) + return null; + + AutomationElement.AutomationElementInformation current; + try + { + current = element.Current; + } + catch (ElementNotAvailableException) + { + return null; + } + + var id = BuildId(current, prefix, path); + nativeObjects[id] = element; + var info = Map(element, current, id, isRoot); + + AutomationElementCollection children; + try + { + children = element.FindAll(TreeScope.Children, System.Windows.Automation.Condition.TrueCondition); + } + catch (Exception ex) when (ex is ElementNotAvailableException or COMException) + { + return info; + } + + var index = 0; + for (var i = 0; i < children.Count; i++) + { + if (info.Children?.Count >= MaxNodesPerWindow) + break; + + var child = children[i]; + if (child is null) + continue; + + var childInfo = WalkAutomationElement(child, prefix, [.. path, index++], nativeObjects, depth + 1, maxDepth, isRoot: false); + if (childInfo is not null) + { + info.Children ??= []; + info.Children.Add(childInfo); + } + } + + return info; + } + + private static ElementInfo Map(AutomationElement element, AutomationElement.AutomationElementInformation current, string id, bool isRoot) + { + var controlType = current.ControlType; + var type = controlType?.LocalizedControlType ?? "element"; + var traits = new List(); + if (isRoot) + traits.Add("dialog"); + if (HasActionPattern(element)) + { + traits.Add("actionable"); + traits.Add("interactive"); + } + + if (CanFocus(current, element)) + traits.Add("focusable"); + if (CanScroll(element)) + traits.Add("scrollable"); + + var properties = new Dictionary + { + ["controlType"] = controlType?.ProgrammaticName, + ["className"] = string.IsNullOrWhiteSpace(current.ClassName) ? null : current.ClassName, + ["nativeWindowHandle"] = current.NativeWindowHandle == 0 ? null : $"0x{current.NativeWindowHandle:X}", + ["processId"] = current.ProcessId.ToString(), + ["isModal"] = TryGetIsModal(element)?.ToString(), + ["framework"] = string.IsNullOrWhiteSpace(current.FrameworkId) ? null : current.FrameworkId, + ["isOffscreen"] = current.IsOffscreen.ToString(), + ["hasKeyboardFocus"] = current.HasKeyboardFocus.ToString() + }; + + return new ElementInfo + { + Id = id, + Framework = "windows-native", + Type = NormalizeType(type), + FullType = controlType?.ProgrammaticName ?? string.Empty, + AutomationId = string.IsNullOrWhiteSpace(current.AutomationId) ? null : current.AutomationId, + Text = string.IsNullOrWhiteSpace(current.Name) ? null : current.Name, + Value = TryGetValue(element), + Role = controlType?.LocalizedControlType, + Traits = traits.Count > 0 ? traits : null, + Bounds = MapBounds(current.BoundingRectangle), + WindowBounds = MapBounds(current.BoundingRectangle), + IsVisible = !current.IsOffscreen, + IsEnabled = current.IsEnabled, + IsFocused = current.HasKeyboardFocus, + NativeType = controlType?.ProgrammaticName, + NativeProperties = properties + }; + } + + private static bool CanFocus(AutomationElement.AutomationElementInformation current, AutomationElement element) + { + if (current.IsKeyboardFocusable) + return true; + + return HasActionPattern(element) || TryGetValue(element) is not null; + } + + private static BoundsInfo? MapBounds(System.Windows.Rect rect) + { + if (rect.IsEmpty || double.IsInfinity(rect.Width) || double.IsInfinity(rect.Height)) + return null; + + return new BoundsInfo + { + X = rect.X, + Y = rect.Y, + Width = Math.Max(0, rect.Width), + Height = Math.Max(0, rect.Height) + }; + } + + private static bool? TryGetIsModal(AutomationElement element) + { + try + { + if (element.TryGetCurrentPattern(WindowPattern.Pattern, out var pattern) && pattern is WindowPattern windowPattern) + return windowPattern.Current.IsModal; + } + catch (Exception ex) when (ex is ElementNotAvailableException or InvalidOperationException or COMException) + { + } + + return null; + } + + private static string? TryGetValue(AutomationElement element) + { + try + { + if (element.TryGetCurrentPattern(ValuePattern.Pattern, out var pattern) && pattern is ValuePattern valuePattern) + return valuePattern.Current.Value; + } + catch (Exception ex) when (ex is ElementNotAvailableException or InvalidOperationException or COMException) + { + } + + return null; + } + + private static bool HasActionPattern(AutomationElement element) + { + try + { + return element.TryGetCurrentPattern(InvokePattern.Pattern, out _) || + element.TryGetCurrentPattern(TogglePattern.Pattern, out _) || + element.TryGetCurrentPattern(SelectionItemPattern.Pattern, out _) || + element.TryGetCurrentPattern(ExpandCollapsePattern.Pattern, out _); + } + catch (Exception ex) when (ex is ElementNotAvailableException or InvalidOperationException or COMException) + { + return false; + } + } + + private static bool CanScroll(AutomationElement element) + { + try + { + return element.TryGetCurrentPattern(ScrollPattern.Pattern, out _) || + element.TryGetCurrentPattern(ScrollItemPattern.Pattern, out _); + } + catch (Exception ex) when (ex is ElementNotAvailableException or InvalidOperationException or COMException) + { + return false; + } + } + + private static string BuildId(AutomationElement.AutomationElementInformation current, string prefix, IReadOnlyList path) + { + var stable = !string.IsNullOrWhiteSpace(current.AutomationId) + ? $"automation:{Sanitize(current.AutomationId)}" + : !string.IsNullOrWhiteSpace(current.Name) + ? $"name:{Sanitize(current.Name)}" + : $"path:{string.Join(".", path)}"; + return $"{prefix}:{stable}"; + } + + private static string Sanitize(string value) + => string.Concat(value.Select(ch => char.IsLetterOrDigit(ch) || ch is '-' or '_' or ':' ? ch : '_')); + + private static string NormalizeType(string raw) + { + if (string.IsNullOrWhiteSpace(raw)) + return "Element"; + + return string.Concat(raw.Split(' ', StringSplitOptions.RemoveEmptyEntries) + .Select(part => char.ToUpperInvariant(part[0]) + part[1..])); + } + + private static string NormalizeLabel(string label) + => label + .Trim() + .Replace('\u2019', '\'') + .Replace("&", string.Empty) + .Replace("_", string.Empty) + .ToUpperInvariant(); +} +#endif diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverBase.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverBase.cs index f577c1073..d4028b9db 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverBase.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverBase.cs @@ -34,13 +34,13 @@ public Task> GetTreeAsync(int maxDepth = 0) public Task> QueryAsync(string? type = null, string? automationId = null, string? text = null) => EnsureClient().QueryAsync(type, automationId, text); - public Task TapAsync(string elementId) + public virtual Task TapAsync(string elementId) => EnsureClient().TapAsync(elementId); - public Task FillAsync(string elementId, string text) + public virtual Task FillAsync(string elementId, string text) => EnsureClient().FillAsync(elementId, text); - public Task ClearAsync(string elementId) + public virtual Task ClearAsync(string elementId) => EnsureClient().ClearAsync(elementId); public virtual Task ScreenshotAsync() diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverFactory.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverFactory.cs index d4a947c28..999c29bbe 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverFactory.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverFactory.cs @@ -12,7 +12,7 @@ public static IAppDriver Create(string platform) "maccatalyst" or "mac" or "catalyst" => new MacCatalystAppDriver(), "android" => new AndroidAppDriver(), "ios" or "iossimulator" => new iOSSimulatorAppDriver(), - "windows" or "win" or "winui" => new WindowsAppDriver(), + "windows" or "win" or "winui" or "wpf" => new WindowsAppDriver(), "linux" or "gtk" => new LinuxAppDriver(), _ => throw new ArgumentException($"Unknown platform: {platform}. Supported: maccatalyst, android, ios, windows, linux") }; diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/Windows/UIAutomationInterop.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/Windows/UIAutomationInterop.cs index a2f78447b..480526ab8 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/Windows/UIAutomationInterop.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/Windows/UIAutomationInterop.cs @@ -14,8 +14,16 @@ internal static class UIAutomationInterop { #if WINDOWS_BUILD private const int UIA_InvokePatternId = 10000; + private const int UIA_ValuePatternId = 10002; + private const int UIA_ScrollPatternId = 10004; + private const int UIA_ScrollItemPatternId = 10017; + + private const int UIA_ProcessIdPropertyId = 30002; + private const int UIA_ControlTypePropertyId = 30003; + private const int UIA_WindowControlTypeId = 50032; private const int UIA_ButtonControlTypeId = 50000; + private const int UIA_EditControlTypeId = 50004; private const int UIA_TextControlTypeId = 50020; private static CUIAutomationClass? _automation; @@ -26,10 +34,6 @@ private static CUIAutomationClass GetAutomation() return _automation; } - // ────────────────────────────────────────────── - // Property helpers - // ────────────────────────────────────────────── - public static string? GetName(IUIAutomationElement element) { try { return element.CurrentName; } catch { return null; } @@ -50,6 +54,53 @@ public static int GetControlType(IUIAutomationElement element) try { return element.CurrentAutomationId; } catch { return null; } } + public static int GetProcessId(IUIAutomationElement element) + { + try { return element.CurrentProcessId; } catch { return 0; } + } + + public static IntPtr GetNativeWindowHandle(IUIAutomationElement element) + { + try + { + var handle = element.CurrentNativeWindowHandle; + return handle == 0 ? IntPtr.Zero : new IntPtr(handle); + } + catch { return IntPtr.Zero; } + } + + public static UiaRect? GetBoundingRectangle(IUIAutomationElement element) + { + try + { + var rect = element.CurrentBoundingRectangle; + var width = Math.Max(0, rect.right - rect.left); + var height = Math.Max(0, rect.bottom - rect.top); + return new UiaRect(rect.left, rect.top, width, height); + } + catch { return null; } + } + + public static bool IsEnabled(IUIAutomationElement element) + { + try { return element.CurrentIsEnabled != 0; } catch { return false; } + } + + public static bool IsOffscreen(IUIAutomationElement element) + { + try { return element.CurrentIsOffscreen != 0; } catch { return false; } + } + + public static bool SetFocus(IUIAutomationElement element) + { + try + { + element.SetFocus(); + return true; + } + catch { return false; } + } + public static bool InvokeElement(IUIAutomationElement element) { try @@ -61,15 +112,56 @@ public static bool InvokeElement(IUIAutomationElement element) catch { return false; } } - // ────────────────────────────────────────────── - // Search helpers - // ────────────────────────────────────────────── + public static bool SetValue(IUIAutomationElement element, string value) + { + try + { + var pattern = (IUIAutomationValuePattern)element.GetCurrentPattern(UIA_ValuePatternId); + if (pattern.CurrentIsReadOnly != 0) + return false; + + pattern.SetValue(value); + return true; + } + catch { return false; } + } + + public static bool ScrollIntoView(IUIAutomationElement element) + { + try + { + var pattern = (IUIAutomationScrollItemPattern)element.GetCurrentPattern(UIA_ScrollItemPatternId); + pattern.ScrollIntoView(); + return true; + } + catch { return false; } + } + + public static bool ScrollElement(IUIAutomationElement element, ScrollAmount horizontalAmount, ScrollAmount verticalAmount) + { + try + { + var pattern = (IUIAutomationScrollPattern)element.GetCurrentPattern(UIA_ScrollPatternId); + pattern.Scroll(horizontalAmount, verticalAmount); + return true; + } + catch { return false; } + } + + public static IUIAutomationElement? ElementFromHandle(IntPtr hwnd) + { + if (hwnd == IntPtr.Zero) + return null; + + try { return GetAutomation().ElementFromHandle(hwnd); } + catch { return null; } + } public static List FindWindowsByProcessId(int processId) { var uia = GetAutomation(); var root = uia.GetRootElement(); - var condition = uia.CreatePropertyCondition(30002, processId); // UIA_ProcessIdPropertyId + var condition = uia.CreatePropertyCondition(UIA_ProcessIdPropertyId, processId); var results = new List(); try @@ -84,10 +176,40 @@ public static List FindWindowsByProcessId(int processId) return results; } + public static List FindWindowsByTitle(string title, bool exact = false) + { + var results = new List(); + if (string.IsNullOrWhiteSpace(title)) + return results; + + foreach (var window in FindTopLevelWindows()) + { + var name = GetName(window); + if (name is null) + continue; + + var matches = exact + ? name.Equals(title, StringComparison.OrdinalIgnoreCase) + : name.Contains(title, StringComparison.OrdinalIgnoreCase); + if (matches) + results.Add(window); + } + + return results; + } + + public static List FindTopLevelWindows() + { + var uia = GetAutomation(); + var root = uia.GetRootElement(); + var condition = uia.CreatePropertyCondition(UIA_ControlTypePropertyId, UIA_WindowControlTypeId); + return FindAll(root, TreeScope.TreeScope_Children, condition); + } + public static List<(IUIAutomationElement element, string name)> FindButtons(IUIAutomationElement root) { var uia = GetAutomation(); - var condition = uia.CreatePropertyCondition(30003, UIA_ButtonControlTypeId); // UIA_ControlTypePropertyId + var condition = uia.CreatePropertyCondition(UIA_ControlTypePropertyId, UIA_ButtonControlTypeId); var results = new List<(IUIAutomationElement, string)>(); try @@ -107,10 +229,21 @@ public static List FindWindowsByProcessId(int processId) return results; } + public static List<(IUIAutomationElement element, string name)> FindNamedButtons(IUIAutomationElement root, IReadOnlySet names) + { + var buttons = FindButtons(root); + if (names.Count == 0) + return buttons; + + return buttons + .Where(b => names.Contains(NormalizeLabel(b.name))) + .ToList(); + } + public static List FindTexts(IUIAutomationElement root) { var uia = GetAutomation(); - var condition = uia.CreatePropertyCondition(30003, UIA_TextControlTypeId); + var condition = uia.CreatePropertyCondition(UIA_ControlTypePropertyId, UIA_TextControlTypeId); var texts = new List(); try @@ -129,27 +262,47 @@ public static List FindTexts(IUIAutomationElement root) return texts; } + public static List FindEdits(IUIAutomationElement root) + { + var uia = GetAutomation(); + var condition = uia.CreatePropertyCondition(UIA_ControlTypePropertyId, UIA_EditControlTypeId); + return FindAll(root, TreeScope.TreeScope_Descendants, condition); + } + public static List FindChildWindows(IUIAutomationElement parent) { var uia = GetAutomation(); - var condition = uia.CreatePropertyCondition(30003, UIA_WindowControlTypeId); - var results = new List(); + var condition = uia.CreatePropertyCondition(UIA_ControlTypePropertyId, UIA_WindowControlTypeId); + return FindAll(parent, TreeScope.TreeScope_Descendants, condition); + } - try + public static IUIAutomationElement? FindFirstByAutomationIdOrName(IEnumerable roots, string idOrName) + { + if (string.IsNullOrWhiteSpace(idOrName)) + return null; + + foreach (var root in roots) { - var array = parent.FindAll(TreeScope.TreeScope_Descendants, condition); - if (array != null) - for (int i = 0; i < array.Length; i++) - results.Add(array.GetElement(i)); + if (MatchesIdOrName(root, idOrName)) + return root; + + foreach (var element in FindAll(root, TreeScope.TreeScope_Descendants, GetAutomation().CreateTrueCondition())) + { + if (MatchesIdOrName(element, idOrName)) + return element; + } } - catch { } - return results; + return null; } - // ────────────────────────────────────────────── - // Tree dump - // ────────────────────────────────────────────── + public static List FindAllByProcessId(int processId) + { + var uia = GetAutomation(); + var root = uia.GetRootElement(); + var condition = uia.CreatePropertyCondition(UIA_ProcessIdPropertyId, processId); + return FindAll(root, TreeScope.TreeScope_Descendants, condition); + } public static string DumpTree(IUIAutomationElement element, int maxDepth = 8) { @@ -169,10 +322,12 @@ private static void DumpTreeRecursive(IUIAutomationTreeWalker walker, IUIAutomat var controlType = GetLocalizedControlType(element) ?? "?"; var name = GetName(element) ?? ""; var automationId = GetAutomationId(element) ?? ""; + var bounds = GetBoundingRectangle(element); sb.Append(indent).Append(controlType); if (name.Length > 0) sb.Append($" \"{name}\""); if (automationId.Length > 0) sb.Append($" [{automationId}]"); + if (bounds is not null) sb.Append($" ({bounds.Value.X},{bounds.Value.Y},{bounds.Value.Width}x{bounds.Value.Height})"); sb.AppendLine(); var child = walker.GetFirstChildElement(element); @@ -184,5 +339,41 @@ private static void DumpTreeRecursive(IUIAutomationTreeWalker walker, IUIAutomat } catch { } } + + private static List FindAll(IUIAutomationElement root, TreeScope scope, IUIAutomationCondition condition) + { + var results = new List(); + + try + { + var array = root.FindAll(scope, condition); + if (array != null) + for (int i = 0; i < array.Length; i++) + results.Add(array.GetElement(i)); + } + catch { } + + return results; + } + + private static bool MatchesIdOrName(IUIAutomationElement element, string idOrName) + { + var automationId = GetAutomationId(element); + if (automationId?.Equals(idOrName, StringComparison.OrdinalIgnoreCase) == true) + return true; + + var name = GetName(element); + return name?.Equals(idOrName, StringComparison.OrdinalIgnoreCase) == true; + } + + public static string NormalizeLabel(string label) + => label + .Trim() + .Replace('\u2019', '\'') + .Replace("&", string.Empty) + .Replace("_", string.Empty) + .ToUpperInvariant(); + + public readonly record struct UiaRect(double X, double Y, double Width, double Height); #endif } diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs index e993ec989..8826163d6 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs @@ -3,6 +3,7 @@ #if WINDOWS_BUILD using Interop.UIAutomationClient; using Microsoft.Maui.DevFlow.Driver.Windows; +using SkiaSharp; #endif namespace Microsoft.Maui.DevFlow.Driver; @@ -11,17 +12,6 @@ namespace Microsoft.Maui.DevFlow.Driver; /// Driver for Windows MAUI apps (WinUI3). /// Direct localhost connection, no special setup needed. /// Uses Windows UI Automation (UIA) via COM interop to detect and dismiss native dialogs. -/// -/// Detection strategy: -/// 1. Find all top-level windows for the target process. -/// 2. Walk each window's subtree looking for dialog-like patterns: -/// buttons + text elements clustered together. -/// 3. WinUI3 MAUI dialogs (DisplayAlert) render as modal overlays inside the main window, -/// so we scan all descendants, not just separate dialog windows. -/// -/// Button label matching: -/// - Uses UIA Name property on Button control type elements. -/// - Case-insensitive comparison. /// public class WindowsAppDriver : AppDriverBase { @@ -29,10 +19,111 @@ public class WindowsAppDriver : AppDriverBase public int? ProcessId { get; set; } public string? AppName { get; set; } + public string? WindowTitle { get; set; } + public IntPtr WindowHandle { get; set; } - // ────────────────────────────────────────────── - // Key simulation via SendInput P/Invoke - // ────────────────────────────────────────────── +#if WINDOWS_BUILD + public override async Task TapAsync(string elementId) + { + if (await TryAgentActionAsync(() => base.TapAsync(elementId))) + return true; + + EnsureWindows(); + var element = FindTargetElement(elementId); + if (element is null) + return false; + + UIAutomationInterop.ScrollIntoView(element); + return UIAutomationInterop.InvokeElement(element) || ClickElementCenter(element); + } + + public override async Task FillAsync(string elementId, string text) + { + if (await TryAgentActionAsync(() => base.FillAsync(elementId, text))) + return true; + + EnsureWindows(); + var element = FindTargetElement(elementId); + if (element is null) + return false; + + UIAutomationInterop.ScrollIntoView(element); + return UIAutomationInterop.SetValue(element, text); + } + + public override async Task ClearAsync(string elementId) + { + if (await TryAgentActionAsync(() => base.ClearAsync(elementId))) + return true; + + EnsureWindows(); + var element = FindTargetElement(elementId); + return element is not null && UIAutomationInterop.SetValue(element, string.Empty); + } + + public override async Task ScreenshotAsync() + { + if (Client is not null) + { + try + { + var data = await base.ScreenshotAsync(); + if (data is { Length: > 0 }) + return data; + } + catch { } + } + + EnsureWindows(); + return TryCaptureTargetScreenshot(); + } + + public async Task FocusAsync(string elementId) + { + if (Client is not null) + { + try + { + if (await Client.FocusAsync(elementId)) + return true; + } + catch { } + } + + EnsureWindows(); + var element = FindTargetElement(elementId); + return element is not null && UIAutomationInterop.SetFocus(element); + } + + public Task ScrollIntoViewAsync(string elementId) + { + EnsureWindows(); + var element = FindTargetElement(elementId); + return Task.FromResult(element is not null && UIAutomationInterop.ScrollIntoView(element)); + } + + public async Task ScrollAsync(string? elementId = null, double deltaX = 0, double deltaY = 0) + { + if (Client is not null) + { + try + { + if (await Client.ScrollAsync(elementId, deltaX, deltaY)) + return true; + } + catch { } + } + + EnsureWindows(); + var element = elementId is null + ? ResolveTargetWindows(throwIfMissing: false).FirstOrDefault() + : FindTargetElement(elementId); + if (element is null) + return false; + + return UIAutomationInterop.ScrollElement(element, ToScrollAmount(deltaX), ToScrollAmount(deltaY)); + } +#endif #if WINDOWS_BUILD public override Task BackAsync() => PressKeyAsync("ESCAPE"); @@ -49,10 +140,6 @@ public override Task PressKeyAsync(string key) public override Task PressKeyAsync(string key) => throw new PlatformNotSupportedException("Windows operations require Windows."); #endif - // ────────────────────────────────────────────── - // Screen Recording via ffmpeg - // ────────────────────────────────────────────── - public override async Task StartRecordingAsync(string outputFile, int timeoutSeconds = 30) { EnsureNotRecording(); @@ -62,12 +149,7 @@ public override async Task StartRecordingAsync(string outputFile, int timeoutSec if (!fullPath.EndsWith(".mp4", StringComparison.OrdinalIgnoreCase)) fullPath = Path.ChangeExtension(fullPath, ".mp4"); - var input = "desktop"; - if (!string.IsNullOrEmpty(AppName)) - input = $"title={AppName}"; - - var psi = new ProcessStartInfo("ffmpeg", - $"-f gdigrab -framerate 30 -t {timeoutSeconds} -i {input} -y \"{fullPath}\"") + var psi = new ProcessStartInfo("ffmpeg") { RedirectStandardInput = true, RedirectStandardOutput = true, @@ -75,6 +157,16 @@ public override async Task StartRecordingAsync(string outputFile, int timeoutSec UseShellExecute = false, CreateNoWindow = true }; + psi.ArgumentList.Add("-f"); + psi.ArgumentList.Add("gdigrab"); + psi.ArgumentList.Add("-framerate"); + psi.ArgumentList.Add("30"); + psi.ArgumentList.Add("-t"); + psi.ArgumentList.Add(timeoutSeconds.ToString()); + psi.ArgumentList.Add("-i"); + psi.ArgumentList.Add(ResolveGdiGrabInput()); + psi.ArgumentList.Add("-y"); + psi.ArgumentList.Add(fullPath); var process = Process.Start(psi) ?? throw new InvalidOperationException("Failed to start ffmpeg"); @@ -99,7 +191,6 @@ public override async Task StopRecordingAsync() if (state.Platform != "windows") throw new InvalidOperationException($"Active recording is on {state.Platform}, not Windows."); - // Send 'q' to ffmpeg's stdin for graceful stop try { var proc = Process.GetProcessById(state.RecordingPid); @@ -143,28 +234,24 @@ private static void EnsureFfmpeg() } } - // ────────────────────────────────────────────── - // Dialog detection & dismissal via UIA - // ────────────────────────────────────────────── - #if WINDOWS_BUILD public Task DetectAlertAsync() { EnsureWindows(); - var pid = ResolveProcessId(); - return Task.FromResult(DetectDialog(pid)); + var windows = ResolveTargetWindows(); + return Task.FromResult(DetectDialog(windows)); } public Task DismissAlertAsync(string? buttonLabel = null) { EnsureWindows(); - var pid = ResolveProcessId(); - var buttons = FindDialogButtonsCore(pid); + var windows = ResolveTargetWindows(); + var buttons = FindDialogButtonsCore(windows); if (buttons.Count == 0) throw new InvalidOperationException("No alert detected to dismiss."); var target = PickButton(buttons, buttonLabel); - if (!UIAutomationInterop.InvokeElement(target.element)) + if (!UIAutomationInterop.InvokeElement(target.element) && !ClickElementCenter(target.element)) throw new InvalidOperationException("UIA Invoke action failed."); return Task.CompletedTask; @@ -173,17 +260,18 @@ public Task DismissAlertAsync(string? buttonLabel = null) public Task HandleAlertIfPresentAsync(string? buttonLabel = null) { EnsureWindows(); - var pid = ResolveProcessId(); - var buttons = FindDialogButtonsCore(pid); + var windows = ResolveTargetWindows(); + var buttons = FindDialogButtonsCore(windows); if (buttons.Count == 0) return Task.FromResult(null); - var alertButtons = buttons.Select(b => new AlertButton(b.name, 0, 0, 0, 0)).ToList(); - var texts = FindDialogTextsCore(pid); + var alertButtons = buttons.Select(ToAlertButton).ToList(); + var texts = FindDialogTextsCore(windows); var info = new AlertInfo(texts.FirstOrDefault(), alertButtons); var target = PickButton(buttons, buttonLabel); - UIAutomationInterop.InvokeElement(target.element); + if (!UIAutomationInterop.InvokeElement(target.element)) + ClickElementCenter(target.element); return Task.FromResult(info); } @@ -191,72 +279,83 @@ public Task DismissAlertAsync(string? buttonLabel = null) public Task GetAccessibilityTreeAsync() { EnsureWindows(); - var pid = ResolveProcessId(); - var windows = UIAutomationInterop.FindWindowsByProcessId(pid); + var windows = ResolveTargetWindows(); var result = string.Empty; foreach (var window in windows) result += UIAutomationInterop.DumpTree(window); return Task.FromResult(result); } - // ────────────────────────────────────────────── - // Core detection logic - // ────────────────────────────────────────────── - - private static AlertInfo? DetectDialog(int pid) + private static AlertInfo? DetectDialog(IReadOnlyList windows) { - var buttons = FindDialogButtonsCore(pid); + var buttons = FindDialogButtonsCore(windows); if (buttons.Count == 0) return null; - var alertButtons = buttons.Select(b => new AlertButton(b.name, 0, 0, 0, 0)).ToList(); - var texts = FindDialogTextsCore(pid); + var alertButtons = buttons.Select(ToAlertButton).ToList(); + var texts = FindDialogTextsCore(windows); return new AlertInfo(texts.FirstOrDefault(), alertButtons); } - private static List<(IUIAutomationElement element, string name)> FindDialogButtonsCore(int pid) + private static List<(IUIAutomationElement element, string name)> FindDialogButtonsCore(IReadOnlyList windows) { - var windows = UIAutomationInterop.FindWindowsByProcessId(pid); + var candidate = FindDialogCandidate(windows); + return candidate?.Buttons ?? new(); + } + + private static List FindDialogTextsCore(IReadOnlyList windows) + { + var candidate = FindDialogCandidate(windows); + return candidate?.Texts ?? new(); + } + private static DialogCandidate? FindDialogCandidate(IReadOnlyList windows) + { foreach (var window in windows) { var childWindows = UIAutomationInterop.FindChildWindows(window); - foreach (var childWin in childWindows) + foreach (var childWindow in childWindows) { - var buttons = UIAutomationInterop.FindButtons(childWin); + var buttons = UIAutomationInterop.FindButtons(childWindow); if (buttons.Count > 0) { - var texts = UIAutomationInterop.FindTexts(childWin); + var texts = UIAutomationInterop.FindTexts(childWindow); if (texts.Count > 0) - return buttons; + return new DialogCandidate(buttons, texts); } } } - return new(); - } - - private static List FindDialogTextsCore(int pid) - { - var windows = UIAutomationInterop.FindWindowsByProcessId(pid); - foreach (var window in windows) { - var childWindows = UIAutomationInterop.FindChildWindows(window); - foreach (var childWin in childWindows) + var buttons = UIAutomationInterop.FindNamedButtons(window, CommonDialogButtonLabels); + if (buttons.Count > 0) { - var buttons = UIAutomationInterop.FindButtons(childWin); - if (buttons.Count > 0) - return UIAutomationInterop.FindTexts(childWin); + var texts = UIAutomationInterop.FindTexts(window); + if (texts.Count > 0) + return new DialogCandidate(buttons, texts); } } - return new(); + return null; + } + + private static AlertButton ToAlertButton((IUIAutomationElement element, string name) button) + { + var rect = UIAutomationInterop.GetBoundingRectangle(button.element); + return rect is null + ? new AlertButton(button.name, 0, 0, 0, 0) + : new AlertButton(button.name, rect.Value.X, rect.Value.Y, rect.Value.Width, rect.Value.Height); } - // ────────────────────────────────────────────── - // Button matching - // ────────────────────────────────────────────── + private sealed record DialogCandidate( + List<(IUIAutomationElement element, string name)> Buttons, + List Texts); + + private static readonly HashSet CommonDialogButtonLabels = new(StringComparer.OrdinalIgnoreCase) + { + "OK", "CANCEL", "YES", "NO", "CLOSE", "DISMISS", "RETRY", "ABORT", "IGNORE", "CONTINUE", "ALLOW", "DON'T ALLOW" + }; private static (IUIAutomationElement element, string name) PickButton( List<(IUIAutomationElement element, string name)> buttons, string? buttonLabel) @@ -266,13 +365,14 @@ private static (IUIAutomationElement element, string name) PickButton( if (buttonLabel is not null) { - var match = buttons.FirstOrDefault(b => - b.name.Equals(buttonLabel, StringComparison.OrdinalIgnoreCase)); + var normalized = UIAutomationInterop.NormalizeLabel(buttonLabel); + var match = buttons.FirstOrDefault(b => UIAutomationInterop.NormalizeLabel(b.name) == normalized); if (match.element is null) { var available = string.Join(", ", buttons.Select(b => b.name)); throw new InvalidOperationException($"Button '{buttonLabel}' not found. Available: {available}"); } + return match; } @@ -285,15 +385,34 @@ private static (IUIAutomationElement element, string name) PickButton( public Task GetAccessibilityTreeAsync() => throw new PlatformNotSupportedException("Windows operations require Windows."); #endif - // ────────────────────────────────────────────── - // Process resolution - // ────────────────────────────────────────────── - private int ResolveProcessId() { if (ProcessId.HasValue) return ProcessId.Value; +#if WINDOWS_BUILD + if (WindowHandle != IntPtr.Zero) + { + var pid = GetWindowProcessId(WindowHandle); + if (pid > 0) + { + ProcessId = pid; + return pid; + } + } + + if (!string.IsNullOrWhiteSpace(WindowTitle)) + { + var window = UIAutomationInterop.FindWindowsByTitle(WindowTitle).FirstOrDefault(); + var pid = window is null ? 0 : UIAutomationInterop.GetProcessId(window); + if (pid > 0) + { + ProcessId = pid; + return pid; + } + } +#endif + if (!string.IsNullOrEmpty(AppName)) { var processes = Process.GetProcessesByName(AppName); @@ -306,7 +425,11 @@ private int ResolveProcessId() var all = Process.GetProcesses(); var match = all.FirstOrDefault(p => { - try { return p.ProcessName.Contains(AppName, StringComparison.OrdinalIgnoreCase); } + try + { + return p.ProcessName.Contains(AppName, StringComparison.OrdinalIgnoreCase) + || p.MainWindowTitle.Contains(AppName, StringComparison.OrdinalIgnoreCase); + } catch { return false; } }); if (match != null) @@ -316,14 +439,290 @@ private int ResolveProcessId() } } - throw new InvalidOperationException("ProcessId or AppName must be set for Windows operations."); + throw new InvalidOperationException("ProcessId, WindowHandle, WindowTitle, or AppName must be set for Windows operations."); } - // ────────────────────────────────────────────── - // Key simulation - // ────────────────────────────────────────────── + private string ResolveGdiGrabInput() + { + var title = WindowTitle; +#if WINDOWS_BUILD + title ??= TryResolveWindowTitle(); +#endif + title ??= AppName; + + return string.IsNullOrWhiteSpace(title) ? "desktop" : $"title={title}"; + } #if WINDOWS_BUILD + private async Task TryAgentActionAsync(Func> action) + { + if (Client is null) + return false; + + try { return await action(); } + catch { return false; } + } + + private static ScrollAmount ToScrollAmount(double delta) + { + if (delta > 0) + return ScrollAmount.ScrollAmount_LargeIncrement; + if (delta < 0) + return ScrollAmount.ScrollAmount_LargeDecrement; + return ScrollAmount.ScrollAmount_NoAmount; + } + + private IUIAutomationElement? FindTargetElement(string idOrName) + { + var windows = ResolveTargetWindows(throwIfMissing: false); + return windows.Count == 0 + ? null + : UIAutomationInterop.FindFirstByAutomationIdOrName(windows, idOrName); + } + + private List ResolveTargetWindows(bool throwIfMissing = true) + { + var windows = new List(); + + if (WindowHandle != IntPtr.Zero) + { + var window = UIAutomationInterop.ElementFromHandle(WindowHandle); + if (window is not null) + windows.Add(window); + } + + if (ProcessId.HasValue) + windows.AddRange(UIAutomationInterop.FindWindowsByProcessId(ProcessId.Value)); + else if (!string.IsNullOrWhiteSpace(AppName)) + { + try + { + var pid = ResolveProcessId(); + windows.AddRange(UIAutomationInterop.FindWindowsByProcessId(pid)); + } + catch { } + } + + if (!string.IsNullOrWhiteSpace(WindowTitle)) + { + var titleMatches = FilterWindowsByTitle(windows, WindowTitle); + if (titleMatches.Count > 0) + windows = titleMatches; + else + windows.AddRange(UIAutomationInterop.FindWindowsByTitle(WindowTitle)); + } + + windows = DeduplicateWindows(windows); + if (windows.Count == 0 && throwIfMissing) + throw new InvalidOperationException("No Windows UIAutomation windows found for the configured ProcessId, WindowHandle, WindowTitle, or AppName."); + + return windows; + } + + private static List FilterWindowsByTitle(IEnumerable windows, string title) + { + return windows + .Where(w => UIAutomationInterop.GetName(w)?.Contains(title, StringComparison.OrdinalIgnoreCase) == true) + .ToList(); + } + + private static List DeduplicateWindows(IEnumerable windows) + { + var results = new List(); + var seen = new HashSet(StringComparer.OrdinalIgnoreCase); + + foreach (var window in windows) + { + var handle = UIAutomationInterop.GetNativeWindowHandle(window); + var key = handle != IntPtr.Zero + ? $"hwnd:{handle.ToInt64()}" + : $"uia:{UIAutomationInterop.GetProcessId(window)}:{UIAutomationInterop.GetName(window)}"; + if (seen.Add(key)) + results.Add(window); + } + + return results; + } + + private string? TryResolveWindowTitle() + { + if (!string.IsNullOrWhiteSpace(WindowTitle)) + return WindowTitle; + + if (WindowHandle != IntPtr.Zero) + { + var window = UIAutomationInterop.ElementFromHandle(WindowHandle); + var name = window is null ? null : UIAutomationInterop.GetName(window); + if (!string.IsNullOrWhiteSpace(name)) + return name; + } + + foreach (var window in ResolveTargetWindows(throwIfMissing: false)) + { + var name = UIAutomationInterop.GetName(window); + if (!string.IsNullOrWhiteSpace(name)) + return name; + } + + try + { + var process = Process.GetProcessById(ResolveProcessId()); + if (!string.IsNullOrWhiteSpace(process.MainWindowTitle)) + return process.MainWindowTitle; + } + catch { } + + return null; + } + + private byte[]? TryCaptureTargetScreenshot() + { + try + { + return TryResolveTargetRectangle(out var rect) + ? CaptureScreenRectangle(rect) + : null; + } + catch { return null; } + } + + private bool TryResolveTargetRectangle(out CaptureRect rect) + { + if (WindowHandle != IntPtr.Zero && TryGetWindowRectangle(WindowHandle, out rect)) + return true; + + foreach (var window in ResolveTargetWindows(throwIfMissing: false)) + { + var handle = UIAutomationInterop.GetNativeWindowHandle(window); + if (handle != IntPtr.Zero && TryGetWindowRectangle(handle, out rect)) + return true; + + var bounds = UIAutomationInterop.GetBoundingRectangle(window); + if (bounds is { Width: > 0, Height: > 0 }) + { + rect = CaptureRect.FromBounds(bounds.Value); + return true; + } + } + + try + { + var process = Process.GetProcessById(ResolveProcessId()); + if (process.MainWindowHandle != IntPtr.Zero && TryGetWindowRectangle(process.MainWindowHandle, out rect)) + return true; + } + catch { } + + rect = default; + return false; + } + + private static bool TryGetWindowRectangle(IntPtr hwnd, out CaptureRect rect) + { + if (GetWindowRect(hwnd, out var nativeRect)) + { + rect = new CaptureRect( + nativeRect.Left, + nativeRect.Top, + Math.Max(0, nativeRect.Right - nativeRect.Left), + Math.Max(0, nativeRect.Bottom - nativeRect.Top)); + return rect.Width > 0 && rect.Height > 0; + } + + rect = default; + return false; + } + + private static byte[]? CaptureScreenRectangle(CaptureRect rect) + { + var screenDc = GetDC(IntPtr.Zero); + if (screenDc == IntPtr.Zero) + return null; + + var memoryDc = IntPtr.Zero; + var bitmap = IntPtr.Zero; + var previous = IntPtr.Zero; + + try + { + memoryDc = CreateCompatibleDC(screenDc); + bitmap = CreateCompatibleBitmap(screenDc, rect.Width, rect.Height); + if (memoryDc == IntPtr.Zero || bitmap == IntPtr.Zero) + return null; + + previous = SelectObject(memoryDc, bitmap); + if (!BitBlt(memoryDc, 0, 0, rect.Width, rect.Height, screenDc, rect.X, rect.Y, SRCCOPY | CAPTUREBLT)) + return null; + + var bytes = new byte[rect.Width * rect.Height * 4]; + var info = new BITMAPINFO + { + bmiHeader = new BITMAPINFOHEADER + { + biSize = (uint)Marshal.SizeOf(), + biWidth = rect.Width, + biHeight = -rect.Height, + biPlanes = 1, + biBitCount = 32, + biCompression = BI_RGB, + biSizeImage = (uint)bytes.Length + } + }; + + var scanLines = GetDIBits(memoryDc, bitmap, 0, (uint)rect.Height, bytes, ref info, DIB_RGB_COLORS); + if (scanLines == 0) + return null; + + using var skBitmap = new SKBitmap(rect.Width, rect.Height, SKColorType.Bgra8888, SKAlphaType.Opaque); + Marshal.Copy(bytes, 0, skBitmap.GetPixels(), bytes.Length); + using var image = SKImage.FromBitmap(skBitmap); + using var png = image.Encode(SKEncodedImageFormat.Png, 100); + return png.ToArray(); + } + finally + { + if (previous != IntPtr.Zero && memoryDc != IntPtr.Zero) + SelectObject(memoryDc, previous); + if (bitmap != IntPtr.Zero) + DeleteObject(bitmap); + if (memoryDc != IntPtr.Zero) + DeleteDC(memoryDc); + ReleaseDC(IntPtr.Zero, screenDc); + } + } +#endif + +#if WINDOWS_BUILD + private static int GetWindowProcessId(IntPtr hwnd) + { + GetWindowThreadProcessId(hwnd, out var processId); + return (int)processId; + } + + private static bool ClickElementCenter(IUIAutomationElement element) + { + var rect = UIAutomationInterop.GetBoundingRectangle(element); + if (rect is not { Width: > 0, Height: > 0 }) + return false; + + var x = (int)Math.Round(rect.Value.X + rect.Value.Width / 2); + var y = (int)Math.Round(rect.Value.Y + rect.Value.Height / 2); + return ClickPoint(x, y); + } + + private static bool ClickPoint(int x, int y) + { + if (!SetCursorPos(x, y)) + return false; + + var inputs = new INPUT[] + { + new() { type = INPUT_MOUSE, u = new INPUTUNION { mi = new MOUSEINPUT { dwFlags = MOUSEEVENTF_LEFTDOWN } } }, + new() { type = INPUT_MOUSE, u = new INPUTUNION { mi = new MOUSEINPUT { dwFlags = MOUSEEVENTF_LEFTUP } } } + }; + return SendInput((uint)inputs.Length, inputs, Marshal.SizeOf()) == inputs.Length; + } + private static ushort MapKeyToVirtualKey(string key) => key.ToUpperInvariant() switch { "ENTER" or "RETURN" => 0x0D, @@ -343,6 +742,42 @@ private int ResolveProcessId() [DllImport("user32.dll", SetLastError = true)] private static extern uint SendInput(uint nInputs, INPUT[] pInputs, int cbSize); + [DllImport("user32.dll")] + private static extern bool SetCursorPos(int x, int y); + + [DllImport("user32.dll")] + private static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint processId); + + [DllImport("user32.dll", SetLastError = true)] + private static extern bool GetWindowRect(IntPtr hWnd, out NativeRect lpRect); + + [DllImport("user32.dll")] + private static extern IntPtr GetDC(IntPtr hWnd); + + [DllImport("user32.dll")] + private static extern int ReleaseDC(IntPtr hWnd, IntPtr hDC); + + [DllImport("gdi32.dll")] + private static extern IntPtr CreateCompatibleDC(IntPtr hdc); + + [DllImport("gdi32.dll")] + private static extern IntPtr CreateCompatibleBitmap(IntPtr hdc, int cx, int cy); + + [DllImport("gdi32.dll")] + private static extern IntPtr SelectObject(IntPtr hdc, IntPtr h); + + [DllImport("gdi32.dll")] + private static extern bool BitBlt(IntPtr hdc, int x, int y, int cx, int cy, IntPtr hdcSrc, int x1, int y1, int rop); + + [DllImport("gdi32.dll")] + private static extern int GetDIBits(IntPtr hdc, IntPtr hbm, uint start, uint cLines, byte[] lpvBits, ref BITMAPINFO lpbmi, uint usage); + + [DllImport("gdi32.dll")] + private static extern bool DeleteObject(IntPtr ho); + + [DllImport("gdi32.dll")] + private static extern bool DeleteDC(IntPtr hdc); + [StructLayout(LayoutKind.Sequential)] private struct INPUT { @@ -354,6 +789,7 @@ private struct INPUT private struct INPUTUNION { [FieldOffset(0)] public KEYBDINPUT ki; + [FieldOffset(0)] public MOUSEINPUT mi; } [StructLayout(LayoutKind.Sequential)] @@ -366,8 +802,68 @@ private struct KEYBDINPUT public IntPtr dwExtraInfo; } + [StructLayout(LayoutKind.Sequential)] + private struct MOUSEINPUT + { + public int dx; + public int dy; + public uint mouseData; + public uint dwFlags; + public uint time; + public IntPtr dwExtraInfo; + } + + [StructLayout(LayoutKind.Sequential)] + private struct NativeRect + { + public int Left; + public int Top; + public int Right; + public int Bottom; + } + + [StructLayout(LayoutKind.Sequential)] + private struct BITMAPINFO + { + public BITMAPINFOHEADER bmiHeader; + public uint bmiColors; + } + + [StructLayout(LayoutKind.Sequential)] + private struct BITMAPINFOHEADER + { + public uint biSize; + public int biWidth; + public int biHeight; + public ushort biPlanes; + public ushort biBitCount; + public uint biCompression; + public uint biSizeImage; + public int biXPelsPerMeter; + public int biYPelsPerMeter; + public uint biClrUsed; + public uint biClrImportant; + } + + private readonly record struct CaptureRect(int X, int Y, int Width, int Height) + { + public static CaptureRect FromBounds(UIAutomationInterop.UiaRect bounds) + => new( + (int)Math.Floor(bounds.X), + (int)Math.Floor(bounds.Y), + (int)Math.Ceiling(bounds.Width), + (int)Math.Ceiling(bounds.Height)); + } + + private const uint INPUT_MOUSE = 0; private const uint INPUT_KEYBOARD = 1; private const uint KEYEVENTF_KEYUP = 0x0002; + private const uint MOUSEEVENTF_LEFTDOWN = 0x0002; + private const uint MOUSEEVENTF_LEFTUP = 0x0004; + private const int SRCCOPY = 0x00CC0020; + private const int CAPTUREBLT = 0x40000000; + private const uint BI_RGB = 0; + private const uint DIB_RGB_COLORS = 0; private static void SendKeyPress(ushort vk) { From 553c61804f167dfb51d0a7fe46342d01acc423c4 Mon Sep 17 00:00:00 2001 From: redth Date: Wed, 20 May 2026 11:38:42 -0400 Subject: [PATCH 2/5] Address PR review feedback for Windows dialog automation - NativeWindowProbe: replace unbounded FindAll(TreeScope.Descendants) in FindDialogCandidates with a bounded BFS over TreeScope.Children, capping total nodes scanned and walk depth to keep dialog discovery from dominating tree/query latency on large WinUI apps. - DevFlowAgentService.CaptureUiOrNativeAsync: share a CancellationTokenSource across the two probe timer races so surviving Task.Delay timers are cancelled promptly under high query throughput. - DevFlowAgentService.CaptureUiOrNativeAsync: bound the final native task await with NativeUiProbeTimeoutMs so a frozen app's UIA tree walk cannot block the HTTP request indefinitely. - DevFlowAgentService.CaptureUiOrNativeAsync: observe the abandoned uiTask on timeout so a later fault doesn't trigger UnobservedTaskException. - TryScheduleNativeTapFirst: document fire-and-forget semantics so callers understand the queued-vs-completed contract. - NativeWindowProbe: add TryBuildCachedElementInfo helper; use it from PlatformVisualTreeWalker and WpfVisualTreeWalker so GetNativeElementInfoById rebuilds the cached element instead of re-enumerating every same-process window on each native: id lookup. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../DevFlowAgentService.cs | 66 ++++++++++++++-- .../WpfVisualTreeWalker.cs | 14 +++- .../VisualTreeWalker.cs | 14 +++- .../Windows/NativeWindowProbe.cs | 79 +++++++++++++++---- 4 files changed, 151 insertions(+), 22 deletions(-) diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs index 911976076..b9de6a17e 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs @@ -778,6 +778,11 @@ private async Task> CaptureUiOrNativeAsync( return await DispatchAsync(uiCallback); var hwndSource = new TaskCompletionSource>(TaskCreationOptions.RunContinuationsAsynchronously); + // Shared CTS so the surviving Task.Delay timers can be cancelled once a race + // is decided. Without this, every CaptureUiOrNativeAsync call leaves up to + // two timers running for the full NativeUiProbeTimeoutMs window, which under + // automation throughput accumulates uncancelled timers per second. + using var probeCts = new CancellationTokenSource(); var uiTask = DispatchAsync(() => { try @@ -797,7 +802,17 @@ private async Task> CaptureUiOrNativeAsync( var nativeTask = Task.Run(async () => { - var winner = await Task.WhenAny(hwndSource.Task, Task.Delay(NativeUiProbeTimeoutMs)).ConfigureAwait(false); + Task delayTask; + try + { + delayTask = Task.Delay(NativeUiProbeTimeoutMs, probeCts.Token); + } + catch (OperationCanceledException) + { + delayTask = Task.CompletedTask; + } + + var winner = await Task.WhenAny(hwndSource.Task, delayTask).ConfigureAwait(false); var hwnds = winner == hwndSource.Task ? await hwndSource.Task.ConfigureAwait(false) : Array.Empty(); @@ -813,15 +828,47 @@ private async Task> CaptureUiOrNativeAsync( } }); - var uiWinner = await Task.WhenAny(uiTask, Task.Delay(NativeUiProbeTimeoutMs)).ConfigureAwait(false); + Task uiDelay; + try + { + uiDelay = Task.Delay(NativeUiProbeTimeoutMs, probeCts.Token); + } + catch (OperationCanceledException) + { + uiDelay = Task.CompletedTask; + } + + var uiWinner = await Task.WhenAny(uiTask, uiDelay).ConfigureAwait(false); if (uiWinner != uiTask) { hwndSource.TrySetResult(Array.Empty()); - return await nativeTask.ConfigureAwait(false); - } + // The UI dispatcher is blocked (the exact scenario this code path targets). + // Observe any later fault on the abandoned uiTask so it doesn't trigger + // TaskScheduler.UnobservedTaskException when it eventually completes. + _ = uiTask.ContinueWith( + t => System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] Abandoned uiTask faulted: {t.Exception?.GetBaseException().Message}"), + CancellationToken.None, + TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously, + TaskScheduler.Default); + + // Also bound the native await: NativeUiProbeTimeoutMs only guards the HWND + // discovery wait inside nativeTask; nativeCallback itself (a UIA tree walk) + // is unbounded and can block for minutes on a frozen app. + var nativeWinner = await Task.WhenAny(nativeTask, Task.Delay(NativeUiProbeTimeoutMs, CancellationToken.None)).ConfigureAwait(false); + probeCts.Cancel(); + return nativeWinner == nativeTask + ? await nativeTask.ConfigureAwait(false) + : new List(); + } + + probeCts.Cancel(); var uiResult = await uiTask.ConfigureAwait(false); - var nativeResult = await nativeTask.ConfigureAwait(false); + // Bound the final native await for the same reason as above. + var finalNativeWinner = await Task.WhenAny(nativeTask, Task.Delay(NativeUiProbeTimeoutMs, CancellationToken.None)).ConfigureAwait(false); + var nativeResult = finalNativeWinner == nativeTask + ? await nativeTask.ConfigureAwait(false) + : new List(); if (nativeResult.Count == 0) return uiResult; @@ -1745,6 +1792,15 @@ protected virtual bool TryNativeTap(VisualElement ve) /// Allows platforms whose native click handlers may open synchronous modal loops to schedule /// a native tap before MAUI invokes the managed click event inline. /// + /// + /// When an override returns true, the native invocation is typically queued onto the + /// platform dispatcher (e.g. BeginInvoke/TryEnqueue) rather than completed + /// synchronously. The HTTP caller receives "ok" as soon as the work is queued, so any + /// follow-up command (a screenshot or another query) may race against the dialog actually + /// appearing. Tests should use maui_wait or explicit polling after a tap that is + /// expected to surface a dialog. Faults inside the dispatched invocation are silent from the + /// caller's perspective and only surface in the agent's debug output. + /// protected virtual bool TryScheduleNativeTapFirst(VisualElement ve) { return false; diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs index 3d9ececb1..711ba8400 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs @@ -209,8 +209,20 @@ public override List WalkNativeTree(IReadOnlyList knownWind } public override ElementInfo? GetNativeElementInfoById(string id) - => FlattenElementInfos(WalkNativeTree(Array.Empty())) + { + // Cache-first: avoid a full UIA tree walk (which calls EnumerateProcessTopLevels + // and enumerates every same-process window) when the requested id was already + // resolved by a recent tree/query call. + Dictionary cache; + lock (_nativeObjectsLock) + cache = _nativeObjects; + + if (NativeWindowProbe.TryBuildCachedElementInfo(cache, id) is { } cached) + return cached; + + return FlattenElementInfos(WalkNativeTree(Array.Empty())) .FirstOrDefault(e => e.Id.Equals(id, StringComparison.OrdinalIgnoreCase)); + } public override string TryNativeElementTap(string elementId) { diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs index 5db121bec..ee3316027 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs @@ -671,8 +671,20 @@ public override List WalkNativeTree(IReadOnlyList knownWind } public override ElementInfo? GetNativeElementInfoById(string id) - => FlattenElementInfos(WalkNativeTree(Array.Empty())) + { + // Cache-first: avoid a full UIA tree walk (which calls EnumerateProcessTopLevels + // and enumerates every same-process window) when the requested id was already + // resolved by a recent tree/query call. + Dictionary cache; + lock (_nativeObjectsLock) + cache = _nativeObjects; + + if (NativeWindowProbe.TryBuildCachedElementInfo(cache, id) is { } cached) + return cached; + + return FlattenElementInfos(WalkNativeTree(Array.Empty())) .FirstOrDefault(e => e.Id.Equals(id, StringComparison.OrdinalIgnoreCase)); + } public override string TryNativeElementTap(string elementId) { diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs index 9a83b0796..f5f00bed2 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs @@ -13,6 +13,11 @@ public sealed class NativeWindowProbe { private const int DefaultMaxDepth = 10; private const int MaxNodesPerWindow = 256; + // Bounds the descendant scan during dialog discovery to avoid runaway UIA tree + // walks on large WinUI apps (where TreeScope.Descendants can return thousands of + // nodes via cross-process COM marshaling). + private const int MaxDialogScanNodes = 512; + private const int MaxDialogScanDepth = 8; private static readonly int CurrentProcessId = Environment.ProcessId; private static readonly HashSet CommonDialogButtonLabels = new(StringComparer.OrdinalIgnoreCase) @@ -81,6 +86,30 @@ public void AppendForeignTopLevelWindows( public static AutomationElement? TryGetAutomationElement(IReadOnlyDictionary nativeObjects, string id) => nativeObjects.TryGetValue(id, out var native) && native is AutomationElement element ? element : null; + /// + /// Rebuilds an for a previously-cached + /// without performing a fresh process-wide window enumeration. Returns null when the cached + /// element is no longer available (e.g. dialog closed). + /// + public static ElementInfo? TryBuildCachedElementInfo( + IReadOnlyDictionary nativeObjects, + string id, + int? maxDepth = null) + { + if (TryGetAutomationElement(nativeObjects, id) is not { } element) + return null; + + var depth = maxDepth is > 0 ? maxDepth.Value : DefaultMaxDepth; + // Build a throwaway nativeObjects map so child walks don't leak into the + // caller's cache. The returned ElementInfo's root id is rewritten to match + // the supplied id so it round-trips with the request that produced it. + var scratch = new Dictionary(StringComparer.OrdinalIgnoreCase); + var info = WalkAutomationElement(element, id, [0], scratch, 0, depth, isRoot: true); + if (info is not null) + info.Id = id; + return info; + } + public static bool TryInvoke(AutomationElement element) { try @@ -233,28 +262,48 @@ private void AppendKnownWindowDialogSubtrees( private static IReadOnlyList FindDialogCandidates(AutomationElement root, IntPtr rootHwnd) { + // Walk the subtree breadth-first via TreeScope.Children rather than calling + // FindAll(TreeScope.Descendants) which eagerly materializes the entire UIA + // subtree (potentially thousands of cross-process COM marshalled nodes). + // We cap both total nodes visited and depth to keep dialog discovery bounded. var candidates = new List(); - AutomationElementCollection descendants; - try - { - descendants = root.FindAll(TreeScope.Descendants, System.Windows.Automation.Condition.TrueCondition); - } - catch (Exception ex) when (ex is ElementNotAvailableException or COMException) - { - return candidates; - } + var queue = new Queue<(AutomationElement Element, int Depth)>(); + queue.Enqueue((root, 0)); + var scanned = 0; - for (var i = 0; i < descendants.Count; i++) + while (queue.Count > 0 && scanned < MaxDialogScanNodes) { - var element = descendants[i]; - if (element is null || !IsDialogCandidate(element, rootHwnd)) + var (current, depth) = queue.Dequeue(); + scanned++; + + // Skip the root window itself - only its descendants are dialog candidates. + if (current != root && IsDialogCandidate(current, rootHwnd)) + { + // Once we've identified a dialog candidate we don't need to keep + // descending into its subtree. + candidates.Add(current); continue; + } - if (candidates.Any(existing => IsAncestor(existing, element))) + if (depth >= MaxDialogScanDepth) continue; - candidates.RemoveAll(existing => IsAncestor(element, existing)); - candidates.Add(element); + AutomationElementCollection children; + try + { + children = current.FindAll(TreeScope.Children, System.Windows.Automation.Condition.TrueCondition); + } + catch (Exception ex) when (ex is ElementNotAvailableException or COMException) + { + continue; + } + + for (var i = 0; i < children.Count; i++) + { + var child = children[i]; + if (child is not null) + queue.Enqueue((child, depth + 1)); + } } return candidates; From cd01a6b275d4dce7794938215e2f354908664751 Mon Sep 17 00:00:00 2001 From: redth Date: Wed, 20 May 2026 11:45:22 -0400 Subject: [PATCH 3/5] Address second-pass PR review feedback - WindowsAppDriver.ClickPoint: cast inputs.Length to uint when comparing against SendInput's uint return value so the Windows build compiles. - AppDriverFactory: include 'wpf' in the ArgumentException message so the error reflects the actual accepted platform list. - DevFlowAgentService.CaptureUiOrNativeAsync: add a single in-flight gate (_pendingCaptureUiTask) so repeated tree/query calls don't accumulate unbounded queued UI-dispatch work while the dispatcher is blocked. Subsequent callers fall straight through to a native-only probe until the previously queued uiTask drains. - NativeWindowProbe: add ExtractHwndsFromId helper that parses the embedded HWND from a 'native:hwnd:0x..' element id. - PlatformVisualTreeWalker and WpfVisualTreeWalker: on cache miss, re-walk the native tree seeded with the HWND parsed from the requested id so ':dialog:{n}' prefixes from AppendKnownWindowDialogSubtrees are regenerated and id resolution remains stable across calls. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../DevFlowAgentService.cs | 55 ++++++++++++++++++- .../WpfVisualTreeWalker.cs | 10 +++- .../VisualTreeWalker.cs | 10 +++- .../Windows/NativeWindowProbe.cs | 35 ++++++++++++ .../AppDriverFactory.cs | 2 +- .../WindowsAppDriver.cs | 2 +- 6 files changed, 106 insertions(+), 8 deletions(-) diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs index b9de6a17e..24d405cf2 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.Core/DevFlowAgentService.cs @@ -58,6 +58,11 @@ public partial class DevFlowAgentService : IDisposable, IMarkerPublisher private int _uiHookScanInFlight; private DateTime _lastUiHookScanTsUtc = DateTime.MinValue; private const int NativeUiProbeTimeoutMs = 1500; + // Tracks a previously-dispatched UI capture task that timed out. If still + // pending when a new CaptureUiOrNativeAsync arrives we skip enqueuing another + // uiCallback to avoid unbounded queueing on a blocked dispatcher. + private Task? _pendingCaptureUiTask; + private readonly object _pendingCaptureUiGate = new(); private Shell? _hookedShell; private DateTime? _navigationStartedAtUtc; private string? _navigationTargetRoute; @@ -777,6 +782,37 @@ private async Task> CaptureUiOrNativeAsync( if (!_treeWalker.SupportsNativeElements) return await DispatchAsync(uiCallback); + // Gate: if a previous CaptureUiOrNativeAsync's UI dispatch is still + // pending (the dispatcher is blocked), skip enqueuing another one and + // go native-only. Otherwise repeated tree/query calls while the UI + // thread is blocked would accumulate unbounded queued work. + Task? priorPending; + lock (_pendingCaptureUiGate) + priorPending = _pendingCaptureUiTask; + + if (priorPending is not null && !priorPending.IsCompleted) + { + try + { + var hwnds = _app is null + ? Array.Empty() + : _treeWalker.GetKnownNativeWindowHandles(_app, windowIndex); + return await Task.Run(() => + { + try { return nativeCallback(hwnds); } + catch (Exception ex) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] Native UI probe failed (gated): {ex.GetBaseException().Message}"); + return new List(); + } + }).ConfigureAwait(false); + } + catch + { + return new List(); + } + } + var hwndSource = new TaskCompletionSource>(TaskCreationOptions.RunContinuationsAsynchronously); // Shared CTS so the surviving Task.Delay timers can be cancelled once a race // is decided. Without this, every CaptureUiOrNativeAsync call leaves up to @@ -841,14 +877,29 @@ private async Task> CaptureUiOrNativeAsync( var uiWinner = await Task.WhenAny(uiTask, uiDelay).ConfigureAwait(false); if (uiWinner != uiTask) { + // Record this pending uiTask so concurrent callers can detect the + // dispatcher is blocked and avoid enqueuing additional UI work. + lock (_pendingCaptureUiGate) + _pendingCaptureUiTask = uiTask; + hwndSource.TrySetResult(Array.Empty()); // The UI dispatcher is blocked (the exact scenario this code path targets). // Observe any later fault on the abandoned uiTask so it doesn't trigger // TaskScheduler.UnobservedTaskException when it eventually completes. _ = uiTask.ContinueWith( - t => System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] Abandoned uiTask faulted: {t.Exception?.GetBaseException().Message}"), + t => + { + lock (_pendingCaptureUiGate) + { + if (ReferenceEquals(_pendingCaptureUiTask, t)) + _pendingCaptureUiTask = null; + } + + if (t.IsFaulted) + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] Abandoned uiTask faulted: {t.Exception?.GetBaseException().Message}"); + }, CancellationToken.None, - TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously, + TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default); // Also bound the native await: NativeUiProbeTimeoutMs only guards the HWND diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs index 711ba8400..fd84d9735 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfVisualTreeWalker.cs @@ -203,7 +203,12 @@ public override List WalkNativeTree(IReadOnlyList knownWind return cached; } - WalkNativeTree(Array.Empty()); + // Preserve native ID stability on cache miss: re-walk with the same HWND + // the id was originally produced under. A plain Array.Empty() walk + // would skip AppendKnownWindowDialogSubtrees entirely, so ":dialog:{n}" + // prefixes from a previous tree/query would never be regenerated. + var seedHwnds = NativeWindowProbe.ExtractHwndsFromId(id); + WalkNativeTree(seedHwnds); lock (_nativeObjectsLock) return NativeWindowProbe.TryGetAutomationElement(_nativeObjects, id); } @@ -220,7 +225,8 @@ public override List WalkNativeTree(IReadOnlyList knownWind if (NativeWindowProbe.TryBuildCachedElementInfo(cache, id) is { } cached) return cached; - return FlattenElementInfos(WalkNativeTree(Array.Empty())) + var seedHwnds = NativeWindowProbe.ExtractHwndsFromId(id); + return FlattenElementInfos(WalkNativeTree(seedHwnds)) .FirstOrDefault(e => e.Id.Equals(id, StringComparison.OrdinalIgnoreCase)); } diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs index ee3316027..187fabafe 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/VisualTreeWalker.cs @@ -665,7 +665,12 @@ public override List WalkNativeTree(IReadOnlyList knownWind return cached; } - WalkNativeTree(Array.Empty()); + // Preserve native ID stability on cache miss: re-walk with the same HWND + // the id was originally produced under. A plain Array.Empty() walk + // would skip AppendKnownWindowDialogSubtrees entirely, so ":dialog:{n}" + // prefixes from a previous tree/query would never be regenerated. + var seedHwnds = NativeWindowProbe.ExtractHwndsFromId(id); + WalkNativeTree(seedHwnds); lock (_nativeObjectsLock) return NativeWindowProbe.TryGetAutomationElement(_nativeObjects, id); } @@ -682,7 +687,8 @@ public override List WalkNativeTree(IReadOnlyList knownWind if (NativeWindowProbe.TryBuildCachedElementInfo(cache, id) is { } cached) return cached; - return FlattenElementInfos(WalkNativeTree(Array.Empty())) + var seedHwnds = NativeWindowProbe.ExtractHwndsFromId(id); + return FlattenElementInfos(WalkNativeTree(seedHwnds)) .FirstOrDefault(e => e.Id.Equals(id, StringComparison.OrdinalIgnoreCase)); } diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs index f5f00bed2..61a847b45 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs @@ -86,6 +86,41 @@ public void AppendForeignTopLevelWindows( public static AutomationElement? TryGetAutomationElement(IReadOnlyDictionary nativeObjects, string id) => nativeObjects.TryGetValue(id, out var native) && native is AutomationElement element ? element : null; + /// + /// Parses HWND seeds out of a DevFlow native element id of the form + /// native:hwnd:0x{HEX}[:dialog:{N}...]. Returns an empty array when the + /// id doesn't carry an embedded HWND. Used to keep ID generation stable across + /// cache-miss re-walks (without a seed, AppendKnownWindowDialogSubtrees + /// would never run and dialog-scoped ids would never be regenerated). + /// + public static IReadOnlyList ExtractHwndsFromId(string id) + { + if (string.IsNullOrEmpty(id)) + return Array.Empty(); + + const string prefix = "native:hwnd:0x"; + var start = id.IndexOf(prefix, StringComparison.Ordinal); + if (start < 0) + return Array.Empty(); + + var hexStart = start + prefix.Length; + var hexEnd = hexStart; + while (hexEnd < id.Length && IsHexDigit(id[hexEnd])) + hexEnd++; + + if (hexEnd == hexStart) + return Array.Empty(); + + var hex = id.AsSpan(hexStart, hexEnd - hexStart); + if (!long.TryParse(hex, System.Globalization.NumberStyles.HexNumber, System.Globalization.CultureInfo.InvariantCulture, out var hwndValue)) + return Array.Empty(); + + return new[] { new IntPtr(hwndValue) }; + + static bool IsHexDigit(char c) => + (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); + } + /// /// Rebuilds an for a previously-cached /// without performing a fresh process-wide window enumeration. Returns null when the cached diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverFactory.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverFactory.cs index 999c29bbe..0d5b8b386 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverFactory.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/AppDriverFactory.cs @@ -14,7 +14,7 @@ public static IAppDriver Create(string platform) "ios" or "iossimulator" => new iOSSimulatorAppDriver(), "windows" or "win" or "winui" or "wpf" => new WindowsAppDriver(), "linux" or "gtk" => new LinuxAppDriver(), - _ => throw new ArgumentException($"Unknown platform: {platform}. Supported: maccatalyst, android, ios, windows, linux") + _ => throw new ArgumentException($"Unknown platform: {platform}. Supported: maccatalyst, android, ios, windows, wpf, linux") }; } } diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs index 8826163d6..97f7a27f9 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs @@ -720,7 +720,7 @@ private static bool ClickPoint(int x, int y) new() { type = INPUT_MOUSE, u = new INPUTUNION { mi = new MOUSEINPUT { dwFlags = MOUSEEVENTF_LEFTDOWN } } }, new() { type = INPUT_MOUSE, u = new INPUTUNION { mi = new MOUSEINPUT { dwFlags = MOUSEEVENTF_LEFTUP } } } }; - return SendInput((uint)inputs.Length, inputs, Marshal.SizeOf()) == inputs.Length; + return SendInput((uint)inputs.Length, inputs, Marshal.SizeOf()) == (uint)inputs.Length; } private static ushort MapKeyToVirtualKey(string key) => key.ToUpperInvariant() switch From 0b6cebee650513cf10f63d8f93d694458e3df946 Mon Sep 17 00:00:00 2001 From: redth Date: Wed, 20 May 2026 12:02:23 -0400 Subject: [PATCH 4/5] Address third-pass PR review feedback for Windows dialog automation - NativeWindowProbe: zero-extend HWND ints to avoid sign-extension when comparing to MAUI-supplied window handles (line 64 and IsDialogCandidate) - NativeWindowProbe: disambiguate BuildId collisions by appending the visual path when an id already exists in nativeObjects - WpfAgentService.TryScheduleNativeTapFirst: wrap dispatched Invoke/Toggle/RaiseEvent lambdas in try/catch so a stale or disabled element does not surface via DispatcherUnhandledException - DevFlowAgentService (WinUI): wrap DispatcherQueue.TryEnqueue lambda in try/catch to avoid CoreApplication.UnhandledErrorDetected on stale invoke targets - WindowsAppDriver.ClickPoint: capture and restore the cursor position around the synthetic click to avoid permanently relocating the user's cursor Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../WpfAgentService.cs | 41 +++++++++++++++++-- .../DevFlowAgentService.cs | 15 ++++++- .../Windows/NativeWindowProbe.cs | 18 +++++++- .../WindowsAppDriver.cs | 26 +++++++++++- 4 files changed, 93 insertions(+), 7 deletions(-) diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfAgentService.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfAgentService.cs index 119a07a6c..fb62f6a4b 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfAgentService.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent.WPF/WpfAgentService.cs @@ -145,22 +145,57 @@ protected override bool TryScheduleNativeTapFirst(VisualElement ve) var peer = System.Windows.Automation.Peers.UIElementAutomationPeer.FromElement(buttonBase) ?? System.Windows.Automation.Peers.UIElementAutomationPeer.CreatePeerForElement(buttonBase); + // Wrap the dispatched lambdas in try/catch: WPF surfaces unhandled + // BeginInvoke exceptions through Application.DispatcherUnhandledException, + // which can crash the host app if it's not subscribed (or rethrows). The + // common cases (button disabled or stale by the time the dispatcher + // runs the work) are expected and should be silently dropped. if (peer?.GetPattern(System.Windows.Automation.Peers.PatternInterface.Invoke) is System.Windows.Automation.Provider.IInvokeProvider invoke) { - buttonBase.Dispatcher.BeginInvoke(() => invoke.Invoke()); + buttonBase.Dispatcher.BeginInvoke(() => + { + try { invoke.Invoke(); } + catch (Exception ex) when (ex is System.Windows.Automation.ElementNotEnabledException + or System.Windows.Automation.ElementNotAvailableException + or System.Runtime.InteropServices.COMException + or InvalidOperationException) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] WPF native invoke skipped: {ex.GetBaseException().Message}"); + } + }); return true; } if (peer?.GetPattern(System.Windows.Automation.Peers.PatternInterface.Toggle) is System.Windows.Automation.Provider.IToggleProvider toggle) { - buttonBase.Dispatcher.BeginInvoke(() => toggle.Toggle()); + buttonBase.Dispatcher.BeginInvoke(() => + { + try { toggle.Toggle(); } + catch (Exception ex) when (ex is System.Windows.Automation.ElementNotEnabledException + or System.Windows.Automation.ElementNotAvailableException + or System.Runtime.InteropServices.COMException + or InvalidOperationException) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] WPF native toggle skipped: {ex.GetBaseException().Message}"); + } + }); return true; } buttonBase.Dispatcher.BeginInvoke(() => - buttonBase.RaiseEvent(new System.Windows.RoutedEventArgs(ButtonBase.ClickEvent, buttonBase))); + { + try + { + buttonBase.RaiseEvent(new System.Windows.RoutedEventArgs(ButtonBase.ClickEvent, buttonBase)); + } + catch (Exception ex) when (ex is InvalidOperationException + or System.Runtime.InteropServices.COMException) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] WPF RaiseEvent skipped: {ex.GetBaseException().Message}"); + } + }); return true; } catch { } diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/DevFlowAgentService.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/DevFlowAgentService.cs index b6cdf275f..99a36f172 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/DevFlowAgentService.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/DevFlowAgentService.cs @@ -542,7 +542,20 @@ protected override bool TryScheduleNativeTapFirst(VisualElement ve) Microsoft.UI.Xaml.Automation.Peers.FrameworkElementAutomationPeer.CreatePeerForElement(buttonBase); if (peer?.GetPattern(Microsoft.UI.Xaml.Automation.Peers.PatternInterface.Invoke) is Microsoft.UI.Xaml.Automation.Provider.IInvokeProvider invokeProvider) { - return buttonBase.DispatcherQueue.TryEnqueue(() => invokeProvider.Invoke()); + // Wrap the dispatched lambda so a stale/disabled element doesn't + // surface as CoreApplication.UnhandledErrorDetected and crash the + // host app. The TryEnqueue bool only reports whether the work + // item was queued, not whether the invoke itself succeeded. + return buttonBase.DispatcherQueue.TryEnqueue(() => + { + try { invokeProvider.Invoke(); } + catch (Exception ex) when (ex is System.Runtime.InteropServices.COMException + or InvalidOperationException + or UnauthorizedAccessException) + { + System.Diagnostics.Debug.WriteLine($"[Microsoft.Maui.DevFlow] WinUI native invoke skipped: {ex.GetBaseException().Message}"); + } + }); } } #endif diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs index 61a847b45..88e9c3d13 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Agent/Windows/NativeWindowProbe.cs @@ -61,7 +61,12 @@ public void AppendForeignTopLevelWindows( IntPtr hwnd; try { - hwnd = new IntPtr(window.Current.NativeWindowHandle); + // Zero-extend the int handle: AutomationElement.NativeWindowHandle is a + // signed 32-bit value, but valid HWNDs above 0x7FFFFFFF are negative + // when reinterpreted as int. new IntPtr(int) sign-extends, producing + // 0xFFFFFFFF_AABB0001 instead of 0x00000000_AABB0001 on 64-bit, so + // .ToInt64() comparisons against the MAUI-supplied handles miss. + hwnd = new IntPtr(unchecked((long)(uint)window.Current.NativeWindowHandle)); } catch (ElementNotAvailableException) { @@ -356,7 +361,10 @@ private static bool IsDialogCandidate(AutomationElement element, IntPtr rootHwnd return false; } - if (current.NativeWindowHandle != 0 && current.NativeWindowHandle == rootHwnd.ToInt64()) + // Zero-extend the int NativeWindowHandle the same way EnumerateProcessTopLevels + // does, so this comparison matches valid HWNDs above 0x7FFFFFFF on 64-bit. + var nativeHandle = unchecked((long)(uint)current.NativeWindowHandle); + if (nativeHandle != 0 && nativeHandle == rootHwnd.ToInt64()) return false; if (TryGetIsModal(element) == true) @@ -513,6 +521,12 @@ private static IReadOnlyList EnumerateProcessTopLevels() } var id = BuildId(current, prefix, path); + // Sanitize() collapses non-alphanumerics to '_', so siblings with names that + // sanitize identically (e.g. "Don't Allow" and "Don_t Allow") would otherwise + // overwrite each other in nativeObjects and a later action-by-id would + // invoke the wrong element. Disambiguate by appending the tree path. + if (nativeObjects.ContainsKey(id)) + id = $"{id}:path:{string.Join(".", path)}"; nativeObjects[id] = element; var info = Map(element, current, id, isRoot); diff --git a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs index 97f7a27f9..4896c45f4 100644 --- a/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs +++ b/src/DevFlow/Microsoft.Maui.DevFlow.Driver/WindowsAppDriver.cs @@ -712,6 +712,11 @@ private static bool ClickElementCenter(IUIAutomationElement element) private static bool ClickPoint(int x, int y) { + // Save the user's current cursor position so we can restore it after the + // synthetic click — keeping the cursor permanently relocated to the click + // target would interfere with concurrent manual testing on the same machine. + var hadOriginal = GetCursorPos(out var originalCursor); + if (!SetCursorPos(x, y)) return false; @@ -720,7 +725,15 @@ private static bool ClickPoint(int x, int y) new() { type = INPUT_MOUSE, u = new INPUTUNION { mi = new MOUSEINPUT { dwFlags = MOUSEEVENTF_LEFTDOWN } } }, new() { type = INPUT_MOUSE, u = new INPUTUNION { mi = new MOUSEINPUT { dwFlags = MOUSEEVENTF_LEFTUP } } } }; - return SendInput((uint)inputs.Length, inputs, Marshal.SizeOf()) == (uint)inputs.Length; + var sent = SendInput((uint)inputs.Length, inputs, Marshal.SizeOf()) == (uint)inputs.Length; + + if (hadOriginal) + { + // Restoring the cursor is best-effort; ignore failures. + _ = SetCursorPos(originalCursor.X, originalCursor.Y); + } + + return sent; } private static ushort MapKeyToVirtualKey(string key) => key.ToUpperInvariant() switch @@ -745,6 +758,17 @@ private static bool ClickPoint(int x, int y) [DllImport("user32.dll")] private static extern bool SetCursorPos(int x, int y); + [DllImport("user32.dll")] + [return: MarshalAs(UnmanagedType.Bool)] + private static extern bool GetCursorPos(out POINT lpPoint); + + [StructLayout(LayoutKind.Sequential)] + private struct POINT + { + public int X; + public int Y; + } + [DllImport("user32.dll")] private static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint processId); From 74a3089e4bb2ce53b0169f649d9c0d2518e24cb9 Mon Sep 17 00:00:00 2001 From: redth Date: Wed, 20 May 2026 12:18:43 -0400 Subject: [PATCH 5/5] Install full MAUI workload set in DevFlow integration jobs Windows integration tests need the macos workload to compile shared code that references Apple TFMs. Standardize all four DevFlow integration jobs (android, apple, maccatalyst, windows) on the same broad workload set: maui ios macos maccatalyst tvos android wasm-tools. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/devflow-integration.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/devflow-integration.yml b/.github/workflows/devflow-integration.yml index 7406a2c27..da0093708 100644 --- a/.github/workflows/devflow-integration.yml +++ b/.github/workflows/devflow-integration.yml @@ -118,8 +118,8 @@ jobs: with: global-json-file: global.json - - name: Install Android MAUI workload - run: dotnet workload install maui-android + - name: Install MAUI workloads + run: dotnet workload install maui-android android wasm-tools - name: Cache AVD uses: actions/cache@v4 @@ -215,7 +215,7 @@ jobs: global-json-file: global.json - name: Install Apple MAUI workloads - run: dotnet workload install maui ios macos + run: dotnet workload install maui ios macos maccatalyst tvos android wasm-tools - name: List available iOS simulators run: xcrun simctl list devices available @@ -281,7 +281,7 @@ jobs: global-json-file: global.json - name: Install Apple MAUI workloads - run: dotnet workload install maui maccatalyst macos + run: dotnet workload install maui ios macos maccatalyst tvos android wasm-tools - name: Run DevFlow integration tests (maccatalyst) run: > @@ -315,7 +315,7 @@ jobs: global-json-file: global.json - name: Install MAUI workloads - run: dotnet workload install maui + run: dotnet workload install maui ios macos maccatalyst tvos android wasm-tools - name: Run DevFlow integration tests (windows) shell: pwsh