Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 15 additions & 51 deletions examples/operator-example.ts
Original file line number Diff line number Diff line change
@@ -1,83 +1,47 @@
import { Stagehand } from "@/dist";
import { LogLine, Stagehand } from "@/dist";
import dotenv from "dotenv";
import StagehandConfig from "@/stagehand.config";
import chalk from "chalk";

// Load environment variables
dotenv.config();

const INSTRUCTION =
"Go to Google Japan and interact with it in Japanese. Tell me (in English) an authentic recipe that I can make with ingredients found in American grocery stores.";

async function main() {
console.log(`\n${chalk.bold("Stagehand 🤘 Native Agent Example")}\n`);
console.log(`\n${chalk.bold("Stagehand 🤘 Operator Example")}\n`);

// Initialize Stagehand
console.log(`${chalk.cyan("→")} Initializing Stagehand...`);
const stagehand = new Stagehand({
...StagehandConfig,
logger: ({ level, message, timestamp }: LogLine) => {
console.log({ level, message, timestamp });
},
});

await stagehand.init();
console.log(`${chalk.green("✓")} Stagehand initialized`);

try {
const page = stagehand.page;

console.log(`\n${chalk.magenta.bold("⚡ First Agent Execution")}`);

const agent = stagehand.agent({
instructions: `You are a helpful assistant that can use a web browser.
You are currently on the following page: ${page.url()}.
Do not ask follow up questions, the user will trust your judgement.`,
});

console.log(`${chalk.yellow("→")} Navigating to Google...`);
await stagehand.page.goto("https://www.google.com");
console.log(`${chalk.green("✓")} Loaded: ${chalk.dim(page.url())}`);

// Execute the agent again with a different instruction
const firstInstruction =
"Search for openai news on google and extract the name of the first 3 results";
console.log(
`${chalk.cyan("↳")} Instruction: ${chalk.white(firstInstruction)}`,
);

const result1 = await agent.execute(firstInstruction);

console.log(`${chalk.green("✓")} Execution complete`);
console.log(`${chalk.yellow("⤷")} Result:`);
console.log(chalk.white(JSON.stringify(result1, null, 2)));

console.log(`\n${chalk.magenta.bold("⚡ Second Agent Execution")}`);

console.log(`\n${chalk.yellow("→")} Navigating to Apple...`);
await page.goto("https://www.apple.com/shop/buy-mac/macbook-air");
console.log(`${chalk.green("✓")} Loaded: ${chalk.dim(page.url())}`);
const agent = stagehand.agent();

const instruction =
"Add a macbook air to the cart. Choose the most expensive configuration.";
console.log(`${chalk.cyan("↳")} Instruction: ${chalk.white(instruction)}`);
// Execute the agent
console.log(`${chalk.cyan("↳")} Instruction: ${INSTRUCTION}`);

const result = await agent.execute({
instruction,
instruction: INSTRUCTION,
maxSteps: 20,
});

console.log(`${chalk.green("✓")} Execution complete`);
console.log(`${chalk.yellow("⤷")} Result:`);
console.log(chalk.white(JSON.stringify(result, null, 2)));
console.log(JSON.stringify(result, null, 2));
console.log(chalk.white(result.message));
} catch (error) {
console.log(`${chalk.red("✗")} Error: ${error}`);
if (error instanceof Error && error.stack) {
console.log(chalk.dim(error.stack.split("\n").slice(1).join("\n")));
}
} finally {
// Close the browser
console.log(`\n${chalk.yellow("→")} Closing browser...`);
await stagehand.close();
console.log(`${chalk.green("✓")} Browser closed\n`);
}
}

main().catch((error) => {
console.log(`${chalk.red("✗")} Unhandled error in main function`);
console.log(chalk.red(error));
});
main();
126 changes: 79 additions & 47 deletions lib/handlers/operatorHandler.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
import {
AgentAction,
AgentExecuteOptions,
AgentResult,
ActionExecutionResult,
} from "@/types/agent";
import { AgentAction, AgentExecuteOptions, AgentResult } from "@/types/agent";
import { LogLine } from "@/types/log";
import { OperatorResponse, operatorResponseSchema } from "@/types/operator";
import {
OperatorResponse,
operatorResponseSchema,
OperatorSummary,
operatorSummarySchema,
} from "@/types/operator";
import { LLMParsedResponse } from "../inference";
import { ChatMessage, LLMClient } from "../llm/LLMClient";
import { buildOperatorSystemPrompt } from "../prompt";
import { StagehandPage } from "../StagehandPage";
import { ObserveResult } from "@/types/stagehand";

export class StagehandOperatorHandler {
private stagehandPage: StagehandPage;
private logger: (message: LogLine) => void;
private llmClient: LLMClient;
private messages: ChatMessage[];
private lastActionResult: ActionExecutionResult | null = null;
private lastMethod: string | null = null;

constructor(
stagehandPage: StagehandPage,
Expand Down Expand Up @@ -66,21 +65,18 @@ export class StagehandOperatorHandler {

let messageText = `Here is a screenshot of the current page (URL: ${url}):`;

if (this.lastMethod && this.lastActionResult) {
const statusMessage = this.lastActionResult.success
? "was successful"
: `failed with error: ${this.lastActionResult.error}`;

messageText = `Previous action '${this.lastMethod}' ${statusMessage}.\n\n${messageText}`;

if (
this.lastMethod === "extract" &&
this.lastActionResult.success &&
this.lastActionResult.data
) {
messageText = `Previous extraction result: ${JSON.stringify(this.lastActionResult.data, null, 2)}\n\n${messageText}`;
}
}
messageText = `Previous actions were: ${actions
.map((action) => {
let result: string = "";
if (action.type === "act") {
const args = action.playwrightArguments as ObserveResult;
result = `Performed a "${args.method}" action ${args.arguments.length > 0 ? `with arguments: ${args.arguments.map((arg) => `"${arg}"`).join(", ")}` : ""} on "${args.description}"`;
} else if (action.type === "extract") {
result = `Extracted data: ${action.extractionResult}`;
}
return `[${action.type}] ${action.reasoning}. Result: ${result}`;
})
.join("\n")}\n\n${messageText}`;

this.messages.push({
role: "user",
Expand All @@ -103,33 +99,36 @@ export class StagehandOperatorHandler {
completed = true;
}

let playwrightArguments: ObserveResult | undefined;
if (result.method === "act") {
[playwrightArguments] = await this.stagehandPage.page.observe(
result.parameters,
);
}
let extractionResult: unknown | undefined;
if (result.method === "extract") {
extractionResult = await this.stagehandPage.page.extract(
result.parameters,
);
}

await this.executeAction(result, playwrightArguments, extractionResult);

actions.push({
type: result.method,
reasoning: result.reasoning,
taskCompleted: result.taskComplete,
parameters: result.parameters,
playwrightArguments,
extractionResult,
});

currentStep++;

try {
const actionResult = await this.executeAction(result);
this.lastActionResult = {
success: true,
data: actionResult,
};
} catch (error) {
this.lastActionResult = {
success: false,
error: error instanceof Error ? error.message : String(error),
};
}

this.lastMethod = result.method;
}

return {
success: true,
message: actions[actions.length - 1].reasoning as string,
message: await this.getSummary(options.instruction),
actions,
completed: actions[actions.length - 1].taskCompleted as boolean,
};
Expand All @@ -152,7 +151,38 @@ export class StagehandOperatorHandler {
return response;
}

private async executeAction(action: OperatorResponse): Promise<unknown> {
private async getSummary(goal: string): Promise<string> {
const { data: response } =
(await this.llmClient.createChatCompletion<OperatorSummary>({
options: {
messages: [
...this.messages,
{
role: "user",
content: [
{
type: "text",
text: `Now use the steps taken to answer the original instruction of ${goal}.`,
},
],
},
],
response_model: {
name: "operatorSummarySchema",
schema: operatorSummarySchema,
},
requestId: "operator-summary",
},
logger: this.logger,
})) as LLMParsedResponse<OperatorSummary>;

return response.answer;
}
private async executeAction(
action: OperatorResponse,
playwrightArguments?: ObserveResult,
extractionResult?: unknown,
): Promise<unknown> {
const { method, parameters } = action;
const page = this.stagehandPage.page;

Expand All @@ -162,14 +192,16 @@ export class StagehandOperatorHandler {

switch (method) {
case "act":
await page.act({
action: parameters,
slowDomBasedAct: false,
timeoutMs: 5000,
});
if (!playwrightArguments) {
throw new Error("No playwright arguments provided");
}
await page.act(playwrightArguments);
break;
case "extract":
return await page.extract(parameters);
if (!extractionResult) {
throw new Error("No extraction result provided");
}
return extractionResult;
case "goto":
await page.goto(parameters, { waitUntil: "load" });
break;
Expand Down
6 changes: 5 additions & 1 deletion lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,11 @@ export class Stagehand {
{
modelName: options.model,
clientOptions: options.options,
userProvidedInstructions: options.instructions,
userProvidedInstructions:
options.instructions ??
`You are a helpful assistant that can use a web browser.
You are currently on the following page: ${this.stagehandPage.page.url()}.
Do not ask follow up questions, the user will trust your judgement.`,
agentType: options.provider,
},
);
Expand Down
6 changes: 6 additions & 0 deletions types/operator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,9 @@ export const operatorResponseSchema = z.object({
});

export type OperatorResponse = z.infer<typeof operatorResponseSchema>;

export const operatorSummarySchema = z.object({
answer: z.string().describe("The final answer to the original instruction."),
});

export type OperatorSummary = z.infer<typeof operatorSummarySchema>;