diff --git a/assistant/src/config/bundled-skills/computer-use/TOOLS.json b/assistant/src/config/bundled-skills/computer-use/TOOLS.json index 98b9dc22d4d..ec2885fc37b 100644 --- a/assistant/src/config/bundled-skills/computer-use/TOOLS.json +++ b/assistant/src/config/bundled-skills/computer-use/TOOLS.json @@ -250,6 +250,10 @@ "type": "string", "description": "The name of the application to open (e.g. \"Slack\", \"Safari\", \"Google Chrome\", \"VS Code\")" }, + "app_bundle_id": { + "type": "string", + "description": "Bundle identifier of the app (e.g. com.apple.Safari). If provided, used for precise app activation." + }, "reasoning": { "type": "string", "description": "Explanation of why you need to open or switch to this app" diff --git a/assistant/src/daemon/computer-use-session.ts b/assistant/src/daemon/computer-use-session.ts index 465bbee6dc2..24e8e665ed8 100644 --- a/assistant/src/daemon/computer-use-session.ts +++ b/assistant/src/daemon/computer-use-session.ts @@ -684,6 +684,11 @@ export class ComputerUseSession { isError: true, }; } + + // Inject targetAppBundleId when the LLM didn't provide one + if (!input.app_bundle_id && this.targetAppBundleId) { + input = { ...input, app_bundle_id: this.targetAppBundleId }; + } } if (toolName === 'computer_use_run_applescript') { diff --git a/assistant/src/tools/computer-use/definitions.ts b/assistant/src/tools/computer-use/definitions.ts index b2c5b93c566..39d971d7652 100644 --- a/assistant/src/tools/computer-use/definitions.ts +++ b/assistant/src/tools/computer-use/definitions.ts @@ -302,6 +302,10 @@ export const computerUseOpenAppTool: Tool = { type: 'string', description: 'The name of the application to open (e.g. "Slack", "Safari", "Google Chrome", "VS Code")', }, + app_bundle_id: { + type: 'string', + description: 'Bundle identifier of the app (e.g. com.apple.Safari). If provided, used for precise app activation.', + }, reasoning: { type: 'string', description: 'Explanation of why you need to open or switch to this app', diff --git a/clients/macos/vellum-assistant/ComputerUse/ActionExecutor.swift b/clients/macos/vellum-assistant/ComputerUse/ActionExecutor.swift index d103f1103c1..8d16686aed8 100644 --- a/clients/macos/vellum-assistant/ComputerUse/ActionExecutor.swift +++ b/clients/macos/vellum-assistant/ComputerUse/ActionExecutor.swift @@ -235,7 +235,7 @@ final class ActionExecutor: ActionExecuting { try drag(from: CGPoint(x: fromX, y: fromY), to: CGPoint(x: endX, y: endY)) case .openApp: guard let appName = action.appName else { throw ExecutorError.appNotFound("(no name)") } - try await openApp(name: appName) + try await openApp(name: appName, bundleId: action.appBundleId) case .runAppleScript: guard let source = action.script else { throw ExecutorError.appleScriptMissingScript } return try await runAppleScript(source) diff --git a/clients/macos/vellum-assistant/ComputerUse/ActionTypes.swift b/clients/macos/vellum-assistant/ComputerUse/ActionTypes.swift index e92df88654e..1fe556d7dbd 100644 --- a/clients/macos/vellum-assistant/ComputerUse/ActionTypes.swift +++ b/clients/macos/vellum-assistant/ComputerUse/ActionTypes.swift @@ -36,6 +36,7 @@ struct AgentAction: Codable { var summary: String? var waitDuration: Int? var appName: String? + var appBundleId: String? var script: String? var reasoning: String var resolvedFromElementId: Int? @@ -56,6 +57,7 @@ struct AgentAction: Codable { summary: String? = nil, waitDuration: Int? = nil, appName: String? = nil, + appBundleId: String? = nil, script: String? = nil, resolvedFromElementId: Int? = nil, resolvedToElementId: Int? = nil, @@ -74,6 +76,7 @@ struct AgentAction: Codable { self.summary = summary self.waitDuration = waitDuration self.appName = appName + self.appBundleId = appBundleId self.script = script self.resolvedFromElementId = resolvedFromElementId self.resolvedToElementId = resolvedToElementId diff --git a/clients/macos/vellum-assistant/ComputerUse/Session.swift b/clients/macos/vellum-assistant/ComputerUse/Session.swift index 0ce3c505b59..b77f9a9563a 100644 --- a/clients/macos/vellum-assistant/ComputerUse/Session.swift +++ b/clients/macos/vellum-assistant/ComputerUse/Session.swift @@ -912,6 +912,8 @@ final class ComputerUseSession: ObservableObject { ?? extractInt(from: msg.input, key: "wait_duration") let appName = msg.input["app_name"]?.value as? String ?? msg.input["appName"]?.value as? String + let appBundleId = msg.input["app_bundle_id"]?.value as? String + ?? msg.input["appBundleId"]?.value as? String let script = msg.input["script"]?.value as? String let elementId = extractInt(from: msg.input, key: "element_id") ?? extractInt(from: msg.input, key: "elementId") @@ -934,6 +936,7 @@ final class ComputerUseSession: ObservableObject { summary: summary, waitDuration: waitDuration, appName: appName, + appBundleId: appBundleId, script: script, resolvedFromElementId: elementId, resolvedToElementId: toElementId,