Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 33 additions & 37 deletions assistant/src/cli/commands/clients.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ Examples:
.option("--json", "Machine-readable compact JSON output")
.option(
"--capability <name>",
"Filter to clients supporting this capability (e.g. host_bash, host_file, host_cu, host_browser)",
"Filter to clients supporting this capability (e.g. host_bash, host_file, host_cu, host_browser, host_app_control)",
)
.addHelpText(
"after",
`
Options:
--json Output as compact JSON instead of a table.
--capability <name> Only show clients that support the named capability.
Valid values: host_bash, host_file, host_cu, host_browser.
Valid values: host_bash, host_file, host_cu, host_browser, host_app_control.

The table shows each client's ID, interface type, capabilities,
connection timestamps, and host environment (when available).
Expand Down Expand Up @@ -134,13 +134,13 @@ Examples:
},
);

clients
.command("disconnect <clientId>")
.description("Force-disconnect a client by its ID")
.option("--json", "Machine-readable compact JSON output")
.addHelpText(
"after",
`
clients
.command("disconnect <clientId>")
.description("Force-disconnect a client by its ID")
.option("--json", "Machine-readable compact JSON output")
.addHelpText(
"after",
`
Arguments:
clientId The UUID of the client to disconnect (from \`clients list\`).

Expand All @@ -151,34 +151,30 @@ reconnect automatically depending on its implementation.
Examples:
$ assistant clients disconnect a1a30bde-6679-406c-bc32-d5a0d2a7a99e
$ assistant clients disconnect a1a30bde-6679-406c-bc32-d5a0d2a7a99e --json`,
)
.action(
async (
clientId: string,
opts: { json?: boolean },
cmd: Command,
) => {
const result = await cliIpcCall<DisconnectClientResponse>(
"disconnect_client",
{ body: { clientId } },
);

if (!result.ok) {
log.error(result.error ?? "Failed to disconnect client");
process.exitCode = 1;
return;
}

if (opts.json) {
writeOutput(cmd, result.result!);
return;
}

log.info(
`Disconnected client ${clientId} (${result.result!.disconnected} subscriber${result.result!.disconnected === 1 ? "" : "s"} disposed)`,
);
},
);
)
.action(
async (clientId: string, opts: { json?: boolean }, cmd: Command) => {
const result = await cliIpcCall<DisconnectClientResponse>(
"disconnect_client",
{ body: { clientId } },
);

if (!result.ok) {
log.error(result.error ?? "Failed to disconnect client");
process.exitCode = 1;
return;
}

if (opts.json) {
writeOutput(cmd, result.result!);
return;
}

log.info(
`Disconnected client ${clientId} (${result.result!.disconnected} subscriber${result.result!.disconnected === 1 ? "" : "s"} disposed)`,
);
},
);
}

function formatRelativeTime(iso: string): string {
Expand Down
1 change: 1 addition & 0 deletions assistant/src/runtime/routes/events-routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ export function handleSubscribeAssistantEvents(
"host_bash",
"host_file",
"host_cu",
"host_app_control",
"host_browser",
];

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,23 @@ extension AppDelegate {
}
self.inFlightCuTasks[msg.requestId] = task

case .hostAppControlRequest(let msg):
let task = Task { @MainActor in
defer { self.inFlightAppControlTasks.removeValue(forKey: msg.requestId) }

guard !Task.isCancelled else { return }
let result = await AppControlExecutor.perform(msg)
guard !Task.isCancelled else { return }

// Suppress stale POST if cancelled
if HostToolExecutor.isCancelledAndConsume(msg.requestId) {
log.debug("Host app-control result suppressed (cancelled) — requestId=\(msg.requestId, privacy: .public)")
return
}
_ = await HostProxyClient().postAppControlResult(result)
}
self.inFlightAppControlTasks[msg.requestId] = task

case .hostBrowserRequest(let msg):
self.hostBrowserExecutor.execute(msg)
case .hostBrowserCancel(let msg):
Expand All @@ -410,6 +427,8 @@ extension AppDelegate {
HostToolExecutor.cancelHostFileRequest(msg.requestId)
case .hostCuCancel(let msg):
self.cancelHostCuRequest(msg.requestId)
case .hostAppControlCancel(let msg):
self.cancelHostAppControlRequest(msg.requestId)

// Signing identity
case .signBundlePayload(let msg):
Expand Down Expand Up @@ -486,6 +505,19 @@ extension AppDelegate {
log.info("Cancelling host CU — requestId=\(requestId, privacy: .public)")
}

// MARK: - Host App Control Cancel

/// Cancel an in-flight host app-control request: mark it cancelled and
/// cancel the Swift Task. App-control has no overlay to dismiss; the
/// daemon-side proxy resolves the awaiter on cancellation.
func cancelHostAppControlRequest(_ requestId: String) {
HostToolExecutor.markCancelled(requestId)
if let task = inFlightAppControlTasks.removeValue(forKey: requestId) {
task.cancel()
}
log.info("Cancelling host app-control — requestId=\(requestId, privacy: .public)")
}

// MARK: - Signing Identity

/// Handle a sign_bundle_payload request from the assistant.
Expand Down
2 changes: 2 additions & 0 deletions clients/macos/vellum-assistant/App/AppDelegate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ public final class AppDelegate: NSObject, NSApplicationDelegate {
var hostCuOverlayCancellables = Set<AnyCancellable>()
/// In-flight CU tasks keyed by request ID, for cancel support.
var inFlightCuTasks: [String: Task<Void, Never>] = [:]
/// In-flight host app-control tasks keyed by request ID, for cancel support.
var inFlightAppControlTasks: [String: Task<Void, Never>] = [:]
/// Executor for host browser (CDP) requests.
let hostBrowserExecutor = HostBrowserExecutor()
var isStartingSession = false
Expand Down
161 changes: 161 additions & 0 deletions clients/macos/vellum-assistantTests/AppControlConnectionTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import XCTest
@testable import VellumAssistantLib
@testable import VellumAssistantShared

/// Verifies that the SSE message envelope decoder recognizes the
/// `host_app_control_request` and `host_app_control_cancel` wire types and
/// surfaces them as the corresponding `ServerMessage` cases. Without these
/// decoder cases, the daemon's app-control proxy would never reach
/// `AppControlExecutor` on the macOS client.
final class AppControlConnectionTests: XCTestCase {

private func decodeMessage(_ json: String) throws -> ServerMessage {
try JSONDecoder().decode(ServerMessage.self, from: Data(json.utf8))
}

// MARK: - host_app_control_request envelope

func test_decodes_hostAppControlRequest_pressVariant() throws {
let json = #"""
{
"type": "host_app_control_request",
"requestId": "req-app-1",
"conversationId": "conv-1",
"toolName": "app_control_press",
"input": {
"tool": "press",
"app": "com.apple.Safari",
"key": "Return",
"modifiers": ["cmd"],
"durationMs": 50
}
}
"""#

let msg = try decodeMessage(json)

guard case .hostAppControlRequest(let payload) = msg else {
XCTFail("Expected .hostAppControlRequest, got \(msg)")
return
}
XCTAssertEqual(payload.type, "host_app_control_request")
XCTAssertEqual(payload.requestId, "req-app-1")
XCTAssertEqual(payload.conversationId, "conv-1")
XCTAssertEqual(payload.toolName, "app_control_press")
guard case .press(let app, let key, let modifiers, let durationMs) = payload.input else {
XCTFail("Expected .press input variant, got \(payload.input)")
return
}
XCTAssertEqual(app, "com.apple.Safari")
XCTAssertEqual(key, "Return")
XCTAssertEqual(modifiers, ["cmd"])
XCTAssertEqual(durationMs, 50)
}

func test_decodes_hostAppControlRequest_clickVariant() throws {
let json = #"""
{
"type": "host_app_control_request",
"requestId": "req-app-2",
"conversationId": "conv-2",
"toolName": "app_control_click",
"input": {
"tool": "click",
"app": "com.apple.Safari",
"x": 100,
"y": 200,
"button": "left",
"double": false
}
}
"""#

let msg = try decodeMessage(json)

guard case .hostAppControlRequest(let payload) = msg else {
XCTFail("Expected .hostAppControlRequest, got \(msg)")
return
}
XCTAssertEqual(payload.requestId, "req-app-2")
guard case .click(let app, let x, let y, let button, let double) = payload.input else {
XCTFail("Expected .click input variant, got \(payload.input)")
return
}
XCTAssertEqual(app, "com.apple.Safari")
XCTAssertEqual(x, 100)
XCTAssertEqual(y, 200)
XCTAssertEqual(button, "left")
XCTAssertEqual(double, false)
}

// MARK: - host_app_control_cancel envelope

func test_decodes_hostAppControlCancel() throws {
let json = #"""
{
"type": "host_app_control_cancel",
"requestId": "req-app-1"
}
"""#

let msg = try decodeMessage(json)

guard case .hostAppControlCancel(let payload) = msg else {
XCTFail("Expected .hostAppControlCancel, got \(msg)")
return
}
XCTAssertEqual(payload.type, "host_app_control_cancel")
XCTAssertEqual(payload.requestId, "req-app-1")
}

// MARK: - Existing host_cu_* still decode

/// Regression guard: adding the app-control cases must not break the
/// pre-existing CU envelope cases.
func test_decodes_hostCuCancel_stillWorks() throws {
let json = #"""
{
"type": "host_cu_cancel",
"requestId": "cu-req-1"
}
"""#

let msg = try decodeMessage(json)

guard case .hostCuCancel(let payload) = msg else {
XCTFail("Expected .hostCuCancel, got \(msg)")
return
}
XCTAssertEqual(payload.requestId, "cu-req-1")
}

// MARK: - Capability advertisement

/// The macOS client receives capability advertisements from the daemon's
/// SSE registration handshake (`/v1/events`). The literal source of truth
/// for that list is `assistant/src/runtime/routes/events-routes.ts`'s
/// `ALL_CAPABILITIES` array, which is filtered by `supportsHostProxy(id, cap)`
/// for the connecting interface.
///
/// This test pins the *Swift-visible* host-proxy capability identifiers we
/// expect to handle locally so that adding/removing one without a paired
/// macOS executor is caught here.
func test_capabilityAdvertisement_includesHostCuAndHostAppControl() {
let macOSHostProxyCapabilities: Set<String> = [
"host_bash",
"host_file",
"host_cu",
"host_app_control",
"host_browser",
]

XCTAssertTrue(
macOSHostProxyCapabilities.contains("host_cu"),
"host_cu must remain in the advertised capability set"
)
XCTAssertTrue(
macOSHostProxyCapabilities.contains("host_app_control"),
"host_app_control must be advertised so the daemon routes app-control requests to this client"
)
}
}
4 changes: 4 additions & 0 deletions clients/shared/Network/EventStreamClient.swift
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,10 @@ public final class EventStreamClient {
if locallyOwnedConversationIds.contains(msg.conversationId) { return false }
log.warning("Ignoring host_cu_request for non-local conversation \(msg.conversationId, privacy: .public)")
return true
case .hostAppControlRequest(let msg):
if locallyOwnedConversationIds.contains(msg.conversationId) { return false }
log.warning("Ignoring host_app_control_request for non-local conversation \(msg.conversationId, privacy: .public)")
return true
case .hostBrowserRequest(let msg):
if locallyOwnedConversationIds.contains(msg.conversationId) { return false }
log.warning("Ignoring host_browser_request for non-local conversation \(msg.conversationId, privacy: .public)")
Expand Down
26 changes: 26 additions & 0 deletions clients/shared/Network/HostProxyClient.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public protocol HostProxyClientProtocol {
func postBashResult(_ result: HostBashResultPayload) async -> Bool
func postFileResult(_ result: HostFileResultPayload) async -> Bool
func postCuResult(_ result: HostCuResultPayload) async -> Bool
func postAppControlResult(_ result: HostAppControlResultPayload) async -> Bool
func postBrowserResult(_ result: HostBrowserResultPayload) async -> Bool
func postTransferResult(_ result: HostTransferResultPayload) async -> Bool
func pullTransferContent(transferId: String) async throws -> Data
Expand Down Expand Up @@ -80,6 +81,31 @@ public struct HostProxyClient: HostProxyClientProtocol {
}
}

public func postAppControlResult(_ result: HostAppControlResultPayload) async -> Bool {
do {
let body = try JSONEncoder().encode(result)
// pngBase64 may be present (~1-2 MB for full-window screenshots);
// scale the timeout so large payloads don't trigger URLSession's
// cancellation race, mirroring postFileResult's behaviour.
let timeout: TimeInterval = result.pngBase64 != nil
? max(30, TimeInterval(body.count) / (1024 * 1024) * 5 + 30)
: 30
let response = try await GatewayHTTPClient.post(
path: "host-app-control-result",
body: body,
timeout: timeout
)
guard response.isSuccess else {
log.error("postAppControlResult failed (HTTP \(response.statusCode))")
return false
}
return true
} catch {
log.error("postAppControlResult error: \(error.localizedDescription)")
return false
}
}

public func postBrowserResult(_ result: HostBrowserResultPayload) async -> Bool {
do {
let body = try JSONEncoder().encode(result)
Expand Down
Loading
Loading