Skip to content

Commit 04d9709

Browse files
authored
Agnext websurfer (#205)
* Initial work on multimodal websurfer * A little more progress. * Getting function calling to work. * Some basic progress with navigation. * Added ability to print multimodal messages to console. * Fixed hatch error * Nicely print multimodal messages to console. * Got OCR working. * Fixed the click action. * Solved some hatch errors. * Fixed some formatting errors. * Fixed more type errors. * Yet more fixes to types. * Fixed many type errors. * Fixed all type errors. Some needed to be ignored. See todos. * Fixed all? hatch errors? * Fixed multiline aria-names in prompts.
1 parent 5996b45 commit 04d9709

File tree

9 files changed

+1687
-3
lines changed

9 files changed

+1687
-3
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import asyncio
2+
import logging
3+
4+
from agnext.application import SingleThreadedAgentRuntime
5+
from agnext.application.logging import EVENT_LOGGER_NAME
6+
from team_one.agents.multimodal_web_surfer import MultimodalWebSurfer
7+
from team_one.agents.orchestrator import RoundRobinOrchestrator
8+
from team_one.agents.user_proxy import UserProxy
9+
from team_one.messages import RequestReplyMessage
10+
from team_one.utils import LogHandler, create_completion_client_from_env
11+
12+
# NOTE: Don't forget to 'playwright install --with-deps chromium'
13+
14+
15+
async def main() -> None:
16+
# Create the runtime.
17+
runtime = SingleThreadedAgentRuntime()
18+
19+
# Create an appropriate client
20+
client = create_completion_client_from_env()
21+
22+
# Register agents.
23+
web_surfer = runtime.register_and_get_proxy(
24+
"WebSurfer",
25+
lambda: MultimodalWebSurfer(),
26+
)
27+
28+
user_proxy = runtime.register_and_get_proxy(
29+
"UserProxy",
30+
lambda: UserProxy(),
31+
)
32+
33+
runtime.register("orchestrator", lambda: RoundRobinOrchestrator([web_surfer, user_proxy]))
34+
35+
run_context = runtime.start()
36+
37+
actual_surfer = runtime._get_agent(web_surfer.id) # type: ignore
38+
assert isinstance(actual_surfer, MultimodalWebSurfer)
39+
await actual_surfer.init(model_client=client, browser_channel="chromium")
40+
41+
await runtime.send_message(RequestReplyMessage(), user_proxy.id)
42+
await run_context.stop_when_idle()
43+
44+
45+
if __name__ == "__main__":
46+
logger = logging.getLogger(EVENT_LOGGER_NAME)
47+
logger.setLevel(logging.INFO)
48+
log_handler = LogHandler()
49+
logger.handlers = [log_handler]
50+
asyncio.run(main())

python/teams/team-one/pyproject.toml

+3-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ classifiers = [
1717
]
1818
dependencies = [
1919
"agnext@{root:parent:parent:uri}",
20-
"aiofiles"
20+
"aiofiles",
21+
"playwright"
2122
]
2223

2324
[tool.hatch.envs.default]
@@ -53,7 +54,7 @@ allow-direct-references = true
5354
[tool.ruff]
5455
line-length = 120
5556
fix = true
56-
exclude = ["build", "dist"]
57+
exclude = ["build", "dist", "page_script.js"]
5758
target-version = "py310"
5859
include = ["src/**", "examples/*.py"]
5960

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .multimodal_web_surfer import MultimodalWebSurfer
2+
3+
__all__ = ("MultimodalWebSurfer",)

0 commit comments

Comments
 (0)