Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/Cli/Microsoft.Maui.Cli/DevFlow/Mcp/McpServerHost.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ public static async Task RunAsync()
.WithTools<RecordingTools>()
.WithTools<PreferencesTools>()
.WithTools<PlatformTools>()
.WithTools<SensorTools>();
.WithTools<SensorTools>()
.WithTools<BatchTools>();

await builder.Build().RunAsync();
}
Expand Down
12 changes: 12 additions & 0 deletions src/Cli/Microsoft.Maui.Cli/DevFlow/Mcp/Tools/AgentTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ public static async Task<string> Wait(
return $"Timeout after {timeout}s — no agent connected" + (app != null ? $" matching '{app}'" : "") + ".";
}

[McpServerTool(Name = "maui_capabilities"), Description("Get the capabilities supported by the connected agent. Returns a JSON object describing available features (e.g., profiler, sensors, webview). Use this to check what the agent supports before calling other tools.")]
public static async Task<string> Capabilities(
McpAgentSession session,
[Description("Agent HTTP port (optional if only one agent connected)")] int? agentPort = null)
{
var agent = await session.GetAgentClientAsync(agentPort);
var capabilities = await agent.GetCapabilitiesAsync();
Comment thread
rmarinho marked this conversation as resolved.
if (capabilities.ValueKind == System.Text.Json.JsonValueKind.Undefined)
return "Agent not responding. Is the app running?";
return CliJson.SerializeUntyped(capabilities, indented: false);
}

[McpServerTool(Name = "maui_select_agent"), Description("Set the default agent for this MCP session. Subsequent tool calls will use this agent automatically without needing agentPort.")]
public static string SelectAgent(
McpAgentSession session,
Expand Down
73 changes: 73 additions & 0 deletions src/Cli/Microsoft.Maui.Cli/DevFlow/Mcp/Tools/BatchTools.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
using System.ComponentModel;
using System.Text.Json;
using System.Text.Json.Nodes;
using ModelContextProtocol.Server;
using Microsoft.Maui.Cli.DevFlow.Mcp;

namespace Microsoft.Maui.Cli.DevFlow.Mcp.Tools;

[McpServerToolType]
public sealed class BatchTools
{
[McpServerTool(Name = "maui_batch"), Description("""
Execute multiple UI actions in a single request. Actions run sequentially and are not transactional.
Earlier actions are applied even if a later action fails.
The 'actionsJson' parameter must be a JSON array of action objects.
Comment on lines +12 to +15
Copy link

Copilot AI Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR description calls maui_batch “atomic”, but the tool description explicitly says actions are not transactional and earlier actions may be applied even if later ones fail. Please align the PR description and tool behavior/description so “atomic” isn’t misleading.

Copilot uses AI. Check for mistakes.
Each action object must have an "action" (or "type") field specifying the operation.

Supported actions and their fields:
- {"action":"tap", "elementId":"<id>"}
- {"action":"fill", "elementId":"<id>", "text":"<value>"}
- {"action":"clear", "elementId":"<id>"}
- {"action":"key", "key":"enter", "elementId":"<id>"}
- {"action":"focus", "elementId":"<id>"}
- {"action":"scroll", "elementId":"<id>", "deltaX":0, "deltaY":200}
- {"action":"gesture", "type":"swipe", "elementId":"<id>", "direction":"up"}
- {"action":"navigate", "route":"//page"}
- {"action":"back"}
Comment on lines +15 to +27
Copy link

Copilot AI Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tool description and validation messaging say each action must have an "action" field, but the backend and existing tests accept either action or type (and AgentClient’s BatchAsync examples use type). This mismatch will confuse callers and can lead to sending objects with only action that may not behave as expected for some operations (e.g., gesture). Please align the schema documentation and error message with the actual accepted field(s) (e.g., require type or explicitly document action/type aliases and when each is needed).

Copilot uses AI. Check for mistakes.

Note: The backend accepts both "action" and "type" fields. For gesture actions,
use "action":"gesture" with a separate "type" field for the gesture kind.

Example: [{"action":"fill","elementId":"entry1","text":"hello"},{"action":"tap","elementId":"btn1"}]
""")]
public static async Task<string> Batch(
McpAgentSession session,
[Description("JSON array of action objects. Each must have an 'action' or 'type' field (see tool description for schema)")] string actionsJson,
[Description("If true, continue executing remaining actions after a failure (default: false)")] bool continueOnError = false,
[Description("Agent HTTP port (optional if only one agent connected)")] int? agentPort = null)
{
JsonArray parsed;
try
{
var node = JsonNode.Parse(actionsJson);
if (node is not JsonArray array)
return "Invalid input: 'actionsJson' must be a JSON array, not " + (node?.GetValueKind().ToString() ?? "null") + ".";

parsed = array;
}
catch (JsonException ex)
{
return $"Invalid JSON in 'actionsJson': {ex.Message}";
}

if (parsed.Count == 0)
return "Empty actions array — nothing to execute.";

var actions = new List<JsonObject>();
for (int i = 0; i < parsed.Count; i++)
{
if (parsed[i] is not JsonObject obj)
return $"Invalid action at index {i}: expected a JSON object, got {parsed[i]?.GetValueKind().ToString() ?? "null"}.";

if (obj["action"] == null && obj["type"] == null)
return $"Invalid action at index {i}: must have an 'action' or 'type' field (e.g., 'tap', 'fill', 'navigate').";

actions.Add(obj);
}

var agent = await session.GetAgentClientAsync(agentPort);
var result = await agent.BatchAsync(actions, continueOnError);
return CliJson.SerializeUntyped(result, indented: false);
}
}
55 changes: 55 additions & 0 deletions src/Cli/Microsoft.Maui.Cli/DevFlow/Mcp/Tools/InteractionTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,61 @@ public static async Task<string> Clear(
: $"Failed to clear element '{elementId}'.";
}

[McpServerTool(Name = "maui_key"), Description("Send a key press to an element. Supported keys for Entry/Editor/SearchBar: 'enter' (submit or newline), 'backspace' (delete last character). Use 'text' parameter to type characters. For reliable behavior, provide an element ID; omitting it may have no effect depending on the agent/platform implementation.")]
public static async Task<string> Key(
McpAgentSession session,
[Description("Key to press: 'enter', 'return', 'backspace', 'delete'")] string key,
[Description("Target element ID. Optional, but omitting it may result in no action; provide an element ID for reliable behavior.")] string? elementId = null,
Comment on lines +50 to +54
Copy link

Copilot AI Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maui_key requires a non-empty key, but the agent endpoint supports either key or text. With the current signature, “type-only” input isn’t possible without a dummy/empty key, and passing both key and text can result in text being ignored for keys like enter/return. Consider making key nullable/optional and validating that at least one of key or text is provided (and ideally reject conflicting combinations).

Copilot uses AI. Check for mistakes.
[Description("Text to type character by character into the element")] string? text = null,
[Description("Agent HTTP port (optional if only one agent connected)")] int? agentPort = null)
{
var agent = await session.GetAgentClientAsync(agentPort);
var success = await agent.KeyAsync(key, elementId, text);
return success
? elementId is not null
? $"Sent key '{key}' to element '{elementId}'."
: $"Sent key '{key}' without a target element; it may have had no effect."
: $"Failed to send key '{key}'. The target element may not support keyboard input, or no target element was provided.";
}

[McpServerTool(Name = "maui_gesture"), Description("Perform a touch gesture on the app. Supported gesture types: 'swipe' (requires direction), 'tap', 'longpress', and 'long-press'. Use maui_tap for simple taps — this tool is for advanced gestures like swiping.")]
public static async Task<string> Gesture(
McpAgentSession session,
[Description("Gesture type: 'swipe', 'tap', 'longpress', or 'long-press'")] string type,
Comment on lines +67 to +70
Copy link

Copilot AI Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The maui_gesture description implies longpress is a distinct gesture, but the agent currently handles longpress by delegating to the tap action. Consider clarifying in the tool description (or restricting supported types) so agents don’t assume a true long-press interaction is performed.

Suggested change
[McpServerTool(Name = "maui_gesture"), Description("Perform a touch gesture on the app. Supported gesture types: 'swipe' (requires direction), 'tap', 'longpress', and 'long-press'. Use maui_tap for simple taps — this tool is for advanced gestures like swiping.")]
public static async Task<string> Gesture(
McpAgentSession session,
[Description("Gesture type: 'swipe', 'tap', 'longpress', or 'long-press'")] string type,
[McpServerTool(Name = "maui_gesture"), Description("Perform a touch gesture on the app. Supported gesture types: 'swipe' (requires direction) and 'tap'. 'longpress' and 'long-press' are accepted for compatibility but currently use tap behavior rather than performing a true long-press interaction. Use maui_tap for simple taps — this tool is primarily for advanced gestures like swiping.")]
public static async Task<string> Gesture(
McpAgentSession session,
[Description("Gesture type: 'swipe' or 'tap'. 'longpress' and 'long-press' are accepted aliases but currently behave the same as 'tap', not a true long press.")] string type,

Copilot uses AI. Check for mistakes.
[Description("Target element ID (optional)")] string? elementId = null,
Comment on lines +67 to +71
Copy link

Copilot AI Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For maui_gesture, elementId is typed/described as optional, but tap/longpress are implemented by delegating to the tap action, which requires elementId. Consider failing fast when type is tap/longpress and elementId is missing, or update the parameter description to clarify it’s required for those gesture types.

Copilot uses AI. Check for mistakes.
[Description("Swipe direction: 'up', 'down', 'left', or 'right' (required for swipe)")] string? direction = null,
[Description("Swipe distance in pixels (optional, uses default if omitted)")] double? distance = null,
[Description("Gesture duration in milliseconds (optional)")] int? durationMs = null,
[Description("Agent HTTP port (optional if only one agent connected)")] int? agentPort = null)
Comment on lines +72 to +75
Copy link

Copilot AI Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

distance/durationMs default to null, but the agent request model uses non-nullable double Distance/int DurationMs with defaults. Sending JSON null for these fields can cause deserialization to fail on the agent. Consider passing explicit defaults when omitted (matching the agent defaults), or ensure null values aren’t serialized/sent.

Copilot uses AI. Check for mistakes.
{
var normalizedType = (type ?? string.Empty).Trim().ToLowerInvariant();
if (normalizedType == "long-press")
normalizedType = "longpress";

var validTypes = new[] { "swipe", "tap", "longpress" };
if (Array.IndexOf(validTypes, normalizedType) < 0)
return $"Unsupported gesture type '{type}'. Supported types: swipe, tap, longpress, long-press.";

string? normalizedDirection = null;
if (normalizedType == "swipe")
{
normalizedDirection = direction?.Trim().ToLowerInvariant();
var validDirections = new[] { "up", "down", "left", "right" };

if (string.IsNullOrEmpty(normalizedDirection))
return "Swipe gesture requires a 'direction' parameter ('up', 'down', 'left', 'right').";

if (Array.IndexOf(validDirections, normalizedDirection) < 0)
return $"Unsupported swipe direction '{direction}'. Supported directions: up, down, left, right.";
}

var agent = await session.GetAgentClientAsync(agentPort);
var success = await agent.GestureAsync(normalizedType, elementId, normalizedDirection, distance, durationMs);
return success
? elementId is not null ? $"Performed {normalizedType} gesture on element '{elementId}'." : $"Performed {normalizedType} gesture."
: $"Failed to perform {normalizedType} gesture.";
}

[McpServerTool(Name = "maui_scroll"), Description("Scroll a ScrollView, CollectionView, or ListView. Supports delta-based scrolling, scrolling to an item index, or scrolling an element into view.")]
public static async Task<string> Scroll(
McpAgentSession session,
Expand Down
12 changes: 12 additions & 0 deletions src/Cli/Microsoft.Maui.Cli/DevFlow/Mcp/Tools/NavigationTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@ public static async Task<string> Navigate(
: $"Failed to navigate to '{route}'. Route may not exist in the Shell.";
}

[McpServerTool(Name = "maui_back"), Description("Go back in the app navigation stack. Equivalent to pressing the system back button.")]
public static async Task<string> Back(
McpAgentSession session,
[Description("Agent HTTP port (optional if only one agent connected)")] int? agentPort = null)
{
var agent = await session.GetAgentClientAsync(agentPort);
var success = await agent.BackAsync();
return success
? "Navigated back successfully."
: "Failed to navigate back. Navigation stack may be empty.";
}

[McpServerTool(Name = "maui_focus"), Description("Set focus to a UI element.")]
public static async Task<string> Focus(
McpAgentSession session,
Expand Down
Loading