Skip to content

Commit db79a1b

Browse files
committed
Add headless browser to the WebSurferAgent, closes #1481
1 parent 26daa18 commit db79a1b

File tree

9 files changed

+490
-51
lines changed

9 files changed

+490
-51
lines changed

autogen/agentchat/contrib/web_surfer.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
1-
import json
1+
import copy
22
import copy
33
import logging
44
import re
5-
from dataclasses import dataclass
6-
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple
7-
from autogen import Agent, ConversableAgent, AssistantAgent, UserProxyAgent, GroupChatManager, GroupChat, OpenAIWrapper
8-
from autogen.browser_utils import SimpleTextBrowser
9-
from autogen.code_utils import content_str
105
from datetime import datetime
11-
from autogen.token_count_utils import count_token, get_max_token_limit
6+
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple
7+
8+
from autogen import Agent, ConversableAgent, AssistantAgent, UserProxyAgent, OpenAIWrapper
9+
from autogen.browser_utils import SimpleTextBrowser, HeadlessChromeBrowser
1210
from autogen.oai.openai_utils import filter_config
11+
from autogen.token_count_utils import count_token, get_max_token_limit
1312

1413
logger = logging.getLogger(__name__)
1514

1615

1716
class WebSurferAgent(ConversableAgent):
18-
"""(In preview) An agent that acts as a basic web surfer that can search the web and visit web pages."""
17+
"""(In preview) An agent that acts as a basic web surfer that can search the web and visit web pages.
18+
Defaults to a simple text-based browser.
19+
Can be configured to use a headless Chrome browser by providing a browser_config dictionary with the key "headless" set to True.
20+
"""
1921

2022
DEFAULT_PROMPT = (
2123
"You are a helpful AI assistant with access to a web browser (via the provided functions). In fact, YOU ARE THE ONLY MEMBER OF YOUR PARTY WITH ACCESS TO A WEB BROWSER, so please help out where you can by performing web searches, navigating pages, and reporting what you find. Today's date is "
@@ -84,7 +86,11 @@ def __init__(
8486
if browser_config is None:
8587
self.browser = SimpleTextBrowser()
8688
else:
87-
self.browser = SimpleTextBrowser(**browser_config)
89+
headless = browser_config.pop("headless")
90+
if headless:
91+
self.browser = HeadlessChromeBrowser(**browser_config)
92+
else:
93+
self.browser = SimpleTextBrowser(**browser_config)
8894

8995
# Create a copy of the llm_config for the inner monologue agents to use, and set them up with function calling
9096
if llm_config is None: # Nothing to copy
@@ -214,7 +220,7 @@ def _browser_state():
214220
current_page = self.browser.viewport_current_page
215221
total_pages = len(self.browser.viewport_pages)
216222

217-
header += f"Viewport position: Showing page {current_page+1} of {total_pages}.\n"
223+
header += f"Viewport position: Showing page {current_page + 1} of {total_pages}.\n"
218224
return (header, self.browser.viewport)
219225

220226
def _informational_search(query):
@@ -225,7 +231,7 @@ def _informational_search(query):
225231
def _navigational_search(query):
226232
self.browser.visit_page(f"bing: {query}")
227233

228-
# Extract the first linl
234+
# Extract the first link
229235
m = re.search(r"\[.*?\]\((http.*?)\)", self.browser.page_content)
230236
if m:
231237
self.browser.visit_page(m.group(1))

autogen/browser_utils/__init__.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .simple_text_browser import SimpleTextBrowser
2+
from .headless_chrome_browser import HeadlessChromeBrowser
3+
4+
__all__ = (
5+
"SimpleTextBrowser",
6+
"HeadlessChromeBrowser",
7+
)
+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from abc import ABC, abstractmethod
2+
from typing import Optional, Union, Dict
3+
4+
5+
class AbstractBrowser(ABC):
6+
"""An abstract class for a web browser."""
7+
8+
@abstractmethod
9+
def __init__(
10+
self,
11+
start_page: Optional[str] = "about:blank",
12+
viewport_size: Optional[int] = 1024 * 8,
13+
downloads_folder: Optional[Union[str, None]] = None,
14+
bing_api_key: Optional[Union[str, None]] = None,
15+
request_kwargs: Optional[Union[Dict, None]] = None,
16+
):
17+
pass
18+
19+
@property
20+
@abstractmethod
21+
def address(self) -> str:
22+
pass
23+
24+
@abstractmethod
25+
def set_address(self, uri_or_path):
26+
pass
27+
28+
@property
29+
@abstractmethod
30+
def viewport(self) -> str:
31+
pass
32+
33+
@property
34+
@abstractmethod
35+
def page_content(self) -> str:
36+
pass
37+
38+
@abstractmethod
39+
def page_down(self):
40+
pass
41+
42+
@abstractmethod
43+
def page_up(self):
44+
pass
45+
46+
@abstractmethod
47+
def visit_page(self, path_or_uri):
48+
pass
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import re
2+
3+
from bs4 import BeautifulSoup
4+
from selenium import webdriver
5+
from selenium.webdriver.chrome.options import Options
6+
from selenium.webdriver.common.by import By
7+
8+
from autogen.browser_utils.abstract_browser import AbstractBrowser
9+
10+
# Optional PDF support
11+
IS_PDF_CAPABLE = False
12+
try:
13+
import pdfminer
14+
import pdfminer.high_level
15+
16+
IS_PDF_CAPABLE = True
17+
except ModuleNotFoundError:
18+
pass
19+
20+
# Other optional dependencies
21+
try:
22+
import pathvalidate
23+
except ModuleNotFoundError:
24+
pass
25+
26+
from typing import Optional, Union, Dict
27+
28+
29+
class HeadlessChromeBrowser(AbstractBrowser):
30+
"""(In preview) A Selenium powered headless Chrome browser. Suitable for Agentic use."""
31+
32+
def __init__(
33+
self,
34+
start_page: Optional[str] = "about:blank",
35+
viewport_size: Optional[int] = 1024 * 8,
36+
downloads_folder: Optional[Union[str, None]] = None,
37+
bing_api_key: Optional[Union[str, None]] = None,
38+
request_kwargs: Optional[Union[Dict, None]] = None,
39+
):
40+
self.start_page = start_page
41+
self.driver = None
42+
self.viewport_size = viewport_size # Applies only to the standard uri types
43+
self.downloads_folder = downloads_folder
44+
self.history = list()
45+
self.page_title = None
46+
self.viewport_current_page = 0
47+
self.viewport_pages = list()
48+
self.bing_api_key = bing_api_key
49+
self.request_kwargs = request_kwargs
50+
51+
self._start_browser()
52+
53+
def _start_browser(self):
54+
chrome_options = Options()
55+
chrome_options.add_argument("--headless")
56+
self.driver = webdriver.Chrome(options=chrome_options)
57+
self.driver.get(self.start_page)
58+
59+
@property
60+
def address(self) -> str:
61+
return self.driver.current_url
62+
63+
def set_address(self, uri_or_path):
64+
if uri_or_path.startswith("bing:"):
65+
self._bing_search(uri_or_path[len("bing:"):].strip())
66+
else:
67+
self.driver.get(uri_or_path)
68+
69+
@property
70+
def viewport(self) -> str:
71+
# returns the content of the current viewport
72+
return self.page_content
73+
74+
@property
75+
def page_content(self) -> str:
76+
html = self.driver.execute_script("return document.body.innerHTML;")
77+
return self._process_html(html)
78+
79+
def _process_html(self, html: str) -> str:
80+
"""Process the raw HTML content and return the processed text."""
81+
soup = BeautifulSoup(html, "html.parser")
82+
83+
# Remove javascript and style blocks
84+
for script in soup(["script", "style"]):
85+
script.extract()
86+
87+
# Convert to text
88+
text = soup.get_text()
89+
90+
# Remove excessive blank lines
91+
text = re.sub(r"\n{2,}", "\n\n", text).strip()
92+
93+
return text
94+
95+
def _bing_search(self, query):
96+
self.driver.get("https://www.bing.com")
97+
98+
search_bar = self.driver.find_element(By.NAME, "q")
99+
search_bar.clear()
100+
search_bar.send_keys(query)
101+
search_bar.submit()
102+
103+
def page_down(self):
104+
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
105+
106+
def page_up(self):
107+
self.driver.execute_script("window.scrollTo(0, 0);")
108+
109+
def visit_page(self, path_or_uri):
110+
self.set_address(path_or_uri)

autogen/browser_utils.py renamed to autogen/browser_utils/simple_text_browser.py

+21-19
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
import json
1+
import io
2+
import mimetypes
23
import os
3-
import requests
44
import re
5-
import markdownify
6-
import io
75
import uuid
8-
import mimetypes
96
from urllib.parse import urljoin, urlparse
7+
8+
import markdownify
9+
import requests
1010
from bs4 import BeautifulSoup
11-
from dataclasses import dataclass
12-
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple
11+
12+
from autogen.browser_utils.abstract_browser import AbstractBrowser
1313

1414
# Optional PDF support
1515
IS_PDF_CAPABLE = False
@@ -27,17 +27,19 @@
2727
except ModuleNotFoundError:
2828
pass
2929

30+
from typing import Optional, Union, Dict
31+
3032

31-
class SimpleTextBrowser:
33+
class SimpleTextBrowser(AbstractBrowser):
3234
"""(In preview) An extremely simple text-based web browser comparable to Lynx. Suitable for Agentic use."""
3335

3436
def __init__(
35-
self,
36-
start_page: Optional[str] = "about:blank",
37-
viewport_size: Optional[int] = 1024 * 8,
38-
downloads_folder: Optional[Union[str, None]] = None,
39-
bing_api_key: Optional[Union[str, None]] = None,
40-
request_kwargs: Optional[Union[Dict, None]] = None,
37+
self,
38+
start_page: Optional[str] = "about:blank",
39+
viewport_size: Optional[int] = 1024 * 8,
40+
downloads_folder: Optional[Union[str, None]] = None,
41+
bing_api_key: Optional[Union[str, None]] = None,
42+
request_kwargs: Optional[Union[Dict, None]] = None,
4143
):
4244
self.start_page = start_page
4345
self.viewport_size = viewport_size # Applies only to the standard uri types
@@ -64,7 +66,7 @@ def set_address(self, uri_or_path):
6466
if uri_or_path == "about:blank":
6567
self._set_page_content("")
6668
elif uri_or_path.startswith("bing:"):
67-
self._bing_search(uri_or_path[len("bing:") :].strip())
69+
self._bing_search(uri_or_path[len("bing:"):].strip())
6870
else:
6971
if not uri_or_path.startswith("http:") and not uri_or_path.startswith("https:"):
7072
uri_or_path = urljoin(self.address, uri_or_path)
@@ -77,7 +79,7 @@ def set_address(self, uri_or_path):
7779
def viewport(self) -> str:
7880
"""Return the content of the current viewport."""
7981
bounds = self.viewport_pages[self.viewport_current_page]
80-
return self.page_content[bounds[0] : bounds[1]]
82+
return self.page_content[bounds[0]: bounds[1]]
8183

8284
@property
8385
def page_content(self) -> str:
@@ -175,8 +177,8 @@ def _bing_search(self, query):
175177
self.page_title = f"{query} - Search"
176178

177179
content = (
178-
f"A Bing search for '{query}' found {len(web_snippets) + len(news_snippets)} results:\n\n## Web Results\n"
179-
+ "\n\n".join(web_snippets)
180+
f"A Bing search for '{query}' found {len(web_snippets) + len(news_snippets)} results:\n\n## Web Results\n"
181+
+ "\n\n".join(web_snippets)
180182
)
181183
if len(news_snippets) > 0:
182184
content += "\n\n## News Results:\n" + "\n\n".join(news_snippets)
@@ -223,7 +225,7 @@ def _fetch_page(self, url):
223225
if title_elm and len(title_elm) > 0:
224226
main_title = title_elm.string
225227
webpage_text = (
226-
"# " + main_title + "\n\n" + markdownify.MarkdownConverter().convert_soup(body_elm)
228+
"# " + main_title + "\n\n" + markdownify.MarkdownConverter().convert_soup(body_elm)
227229
)
228230
else:
229231
webpage_text = markdownify.MarkdownConverter().convert_soup(soup)

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
"teachable": ["chromadb"],
5353
"lmm": ["replicate", "pillow"],
5454
"graphs": ["networkx~=3.2.1", "matplotlib~=3.8.1"],
55-
"websurfer": ["beautifulsoup4", "markdownify", "pdfminer.six", "pathvalidate"],
55+
"websurfer": ["beautifulsoup4", "markdownify", "pdfminer.six", "pathvalidate", "selenium"],
5656
"redis": ["redis"],
5757
},
5858
classifiers=[

0 commit comments

Comments
 (0)