Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ docs/node_modules/
docs/.next/
docs/out/
docs/package-lock.json

# pnpm
.pnpm-store/
92 changes: 92 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,98 @@ Core workflow:

## Integrations

### iOS Simulator

Control real Mobile Safari in the iOS Simulator for authentic mobile web testing. Requires macOS with Xcode.

**Setup:**

```bash
# Install Appium and XCUITest driver
npm install -g appium
appium driver install xcuitest
```

**Usage:**

```bash
# List available iOS simulators
agent-browser device list

# Launch Safari on a specific device
agent-browser -p ios --device "iPhone 16 Pro" open https://example.com

# Same commands as desktop
agent-browser -p ios snapshot -i
agent-browser -p ios tap @e1
agent-browser -p ios fill @e2 "text"
agent-browser -p ios screenshot mobile.png

# Mobile-specific commands
agent-browser -p ios swipe up
agent-browser -p ios swipe down 500

# Close session
agent-browser -p ios close
```

Or use environment variables:

```bash
export AGENT_BROWSER_PROVIDER=ios
export AGENT_BROWSER_IOS_DEVICE="iPhone 16 Pro"
agent-browser open https://example.com
```

| Variable | Description |
|----------|-------------|
| `AGENT_BROWSER_PROVIDER` | Set to `ios` to enable iOS mode |
| `AGENT_BROWSER_IOS_DEVICE` | Device name (e.g., "iPhone 16 Pro", "iPad Pro") |
| `AGENT_BROWSER_IOS_UDID` | Device UDID (alternative to device name) |

**Supported devices:** All iOS Simulators available in Xcode (iPhones, iPads), plus real iOS devices.

**Note:** The iOS provider boots the simulator, starts Appium, and controls Safari. First launch takes ~30-60 seconds; subsequent commands are fast.

#### Real Device Support

Appium also supports real iOS devices connected via USB. This requires additional one-time setup:

**1. Get your device UDID:**
```bash
xcrun xctrace list devices
# or
system_profiler SPUSBDataType | grep -A 5 "iPhone\|iPad"
```

**2. Sign WebDriverAgent (one-time):**
```bash
# Open the WebDriverAgent Xcode project
cd ~/.appium/node_modules/appium-xcuitest-driver/node_modules/appium-webdriveragent
open WebDriverAgent.xcodeproj
```

In Xcode:
- Select the `WebDriverAgentRunner` target
- Go to Signing & Capabilities
- Select your Team (requires Apple Developer account, free tier works)
- Let Xcode manage signing automatically

**3. Use with agent-browser:**
```bash
# Connect device via USB, then:
agent-browser -p ios --device "<DEVICE_UDID>" open https://example.com

# Or use the device name if unique
agent-browser -p ios --device "John's iPhone" open https://example.com
```

**Real device notes:**
- First run installs WebDriverAgent to the device (may require Trust prompt)
- Device must be unlocked and connected via USB
- Slightly slower initial connection than simulator
- Tests against real Safari performance and behavior

### Browserbase

[Browserbase](https://browserbase.com) provides remote browser infrastructure to make deployment of agentic browsing agents easy. Use it when running the agent-browser CLI in an environment where a local browser isn't feasible.
Expand Down
49 changes: 49 additions & 0 deletions cli/src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ pub fn parse_command(args: &[String], flags: &Flags) -> Result<Value, ParseError
nav_cmd["headers"] = headers;
}
}
// Include iOS device info if specified (needed for auto-launch with existing daemon)
if flags.provider.as_deref() == Some("ios") {
if let Some(ref device) = flags.device {
nav_cmd["iosDevice"] = json!(device);
}
}
Ok(nav_cmd)
}
"back" => Ok(json!({ "id": id, "action": "back" })),
Expand Down Expand Up @@ -835,6 +841,48 @@ pub fn parse_command(args: &[String], flags: &Flags) -> Result<Value, ParseError
}
}

// === iOS-specific commands ===
"tap" => {
// Alias for click (semantic clarity for touch interfaces)
let sel = rest.get(0).ok_or_else(|| ParseError::MissingArguments {
context: "tap".to_string(),
usage: "tap <selector>",
})?;
Ok(json!({ "id": id, "action": "tap", "selector": sel }))
}
"swipe" => {
let direction = rest.get(0).ok_or_else(|| ParseError::MissingArguments {
context: "swipe".to_string(),
usage: "swipe <up|down|left|right> [distance]",
})?;
let valid_directions = ["up", "down", "left", "right"];
if !valid_directions.contains(direction) {
return Err(ParseError::InvalidValue {
message: format!("Invalid swipe direction: {}", direction),
usage: "swipe <up|down|left|right> [distance]",
});
}
let mut cmd = json!({ "id": id, "action": "swipe", "direction": direction });
if let Some(distance) = rest.get(1) {
if let Ok(d) = distance.parse::<u32>() {
cmd.as_object_mut().unwrap().insert("distance".to_string(), json!(d));
}
}
Ok(cmd)
}
"device" => {
match rest.get(0).map(|s| *s) {
Some("list") | None => {
// List available iOS simulators
Ok(json!({ "id": id, "action": "device_list" }))
}
Some(sub) => Err(ParseError::UnknownSubcommand {
subcommand: sub.to_string(),
valid_options: &["list"],
}),
}
}

_ => Err(ParseError::UnknownCommand {
command: cmd.to_string(),
}),
Expand Down Expand Up @@ -1376,6 +1424,7 @@ mod tests {
user_agent: None,
provider: None,
ignore_https_errors: false,
device: None,
}
}

Expand Down
18 changes: 18 additions & 0 deletions cli/src/connection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ pub fn ensure_daemon(
ignore_https_errors: bool,
profile: Option<&str>,
state: Option<&str>,
provider: Option<&str>,
device: Option<&str>,
) -> Result<DaemonResult, String> {
// Check if daemon is running AND responsive
if is_daemon_running(session) && daemon_ready(session) {
Expand Down Expand Up @@ -343,6 +345,14 @@ pub fn ensure_daemon(
cmd.env("AGENT_BROWSER_STATE", st);
}

if let Some(p) = provider {
cmd.env("AGENT_BROWSER_PROVIDER", p);
}

if let Some(d) = device {
cmd.env("AGENT_BROWSER_IOS_DEVICE", d);
}

// Create new process group and session to fully detach
unsafe {
cmd.pre_exec(|| {
Expand Down Expand Up @@ -410,6 +420,14 @@ pub fn ensure_daemon(
cmd.env("AGENT_BROWSER_STATE", st);
}

if let Some(p) = provider {
cmd.env("AGENT_BROWSER_PROVIDER", p);
}

if let Some(d) = device {
cmd.env("AGENT_BROWSER_IOS_DEVICE", d);
}

// CREATE_NEW_PROCESS_GROUP | DETACHED_PROCESS
const CREATE_NEW_PROCESS_GROUP: u32 = 0x00000200;
const DETACHED_PROCESS: u32 = 0x00000008;
Expand Down
9 changes: 9 additions & 0 deletions cli/src/flags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub struct Flags {
pub user_agent: Option<String>,
pub provider: Option<String>,
pub ignore_https_errors: bool,
pub device: Option<String>,
}

pub fn parse_flags(args: &[String]) -> Flags {
Expand Down Expand Up @@ -49,6 +50,7 @@ pub fn parse_flags(args: &[String]) -> Flags {
user_agent: env::var("AGENT_BROWSER_USER_AGENT").ok(),
provider: env::var("AGENT_BROWSER_PROVIDER").ok(),
ignore_https_errors: false,
device: env::var("AGENT_BROWSER_IOS_DEVICE").ok(),
};

let mut i = 0;
Expand Down Expand Up @@ -131,6 +133,12 @@ pub fn parse_flags(args: &[String]) -> Flags {
}
}
"--ignore-https-errors" => flags.ignore_https_errors = true,
"--device" => {
if let Some(d) = args.get(i + 1) {
flags.device = Some(d.clone());
i += 1;
}
}
_ => {}
}
i += 1;
Expand Down Expand Up @@ -165,6 +173,7 @@ pub fn clean_args(args: &[String]) -> Vec<String> {
"--user-agent",
"-p",
"--provider",
"--device",
];

for arg in args.iter() {
Expand Down
2 changes: 2 additions & 0 deletions cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ fn main() {
flags.ignore_https_errors,
flags.profile.as_deref(),
flags.state.as_deref(),
flags.provider.as_deref(),
flags.device.as_deref(),
) {
Ok(result) => result,
Err(e) => {
Expand Down
Loading