Skip to content

Commit

Permalink
Add headless browser to the WebSurferAgent, closes microsoft#1481
Browse files Browse the repository at this point in the history
  • Loading branch information
vijaykramesh committed Feb 6, 2024
1 parent 26daa18 commit b4ec0de
Show file tree
Hide file tree
Showing 11 changed files with 649 additions and 129 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
run: |
python -m pip install --upgrade pip wheel
pip install -e .
pip install -e .[test,websurfer]
python -c "import autogen"
pip install pytest mock
- name: Test with pytest skipping openai tests
Expand Down
28 changes: 17 additions & 11 deletions autogen/agentchat/contrib/web_surfer.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
import json
import copy
import copy
import logging
import re
from dataclasses import dataclass
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple
from autogen import Agent, ConversableAgent, AssistantAgent, UserProxyAgent, GroupChatManager, GroupChat, OpenAIWrapper
from autogen.browser_utils import SimpleTextBrowser
from autogen.code_utils import content_str
from datetime import datetime
from autogen.token_count_utils import count_token, get_max_token_limit
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple

from autogen import Agent, ConversableAgent, AssistantAgent, UserProxyAgent, OpenAIWrapper
from autogen.browser_utils import SimpleTextBrowser, HeadlessChromeBrowser
from autogen.oai.openai_utils import filter_config
from autogen.token_count_utils import count_token, get_max_token_limit

logger = logging.getLogger(__name__)


class WebSurferAgent(ConversableAgent):
"""(In preview) An agent that acts as a basic web surfer that can search the web and visit web pages."""
"""(In preview) An agent that acts as a basic web surfer that can search the web and visit web pages.
Defaults to a simple text-based browser.
Can be configured to use a headless Chrome browser by providing a browser_config dictionary with the key "headless" set to True.
"""

DEFAULT_PROMPT = (
"You are a helpful AI assistant with access to a web browser (via the provided functions). In fact, YOU ARE THE ONLY MEMBER OF YOUR PARTY WITH ACCESS TO A WEB BROWSER, so please help out where you can by performing web searches, navigating pages, and reporting what you find. Today's date is "
Expand Down Expand Up @@ -84,7 +86,11 @@ def __init__(
if browser_config is None:
self.browser = SimpleTextBrowser()
else:
self.browser = SimpleTextBrowser(**browser_config)
headless = browser_config.pop("headless", False)
if headless:
self.browser = HeadlessChromeBrowser(**browser_config)
else:
self.browser = SimpleTextBrowser(**browser_config)

# Create a copy of the llm_config for the inner monologue agents to use, and set them up with function calling
if llm_config is None: # Nothing to copy
Expand Down Expand Up @@ -214,7 +220,7 @@ def _browser_state():
current_page = self.browser.viewport_current_page
total_pages = len(self.browser.viewport_pages)

header += f"Viewport position: Showing page {current_page+1} of {total_pages}.\n"
header += f"Viewport position: Showing page {current_page + 1} of {total_pages}.\n"
return (header, self.browser.viewport)

def _informational_search(query):
Expand All @@ -225,7 +231,7 @@ def _informational_search(query):
def _navigational_search(query):
self.browser.visit_page(f"bing: {query}")

# Extract the first linl
# Extract the first link
m = re.search(r"\[.*?\]\((http.*?)\)", self.browser.page_content)
if m:
self.browser.visit_page(m.group(1))
Expand Down
7 changes: 7 additions & 0 deletions autogen/browser_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .simple_text_browser import SimpleTextBrowser
from .headless_chrome_browser import HeadlessChromeBrowser

__all__ = (
"SimpleTextBrowser",
"HeadlessChromeBrowser",
)
48 changes: 48 additions & 0 deletions autogen/browser_utils/abstract_browser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from abc import ABC, abstractmethod
from typing import Optional, Union, Dict


class AbstractBrowser(ABC):
"""An abstract class for a web browser."""

@abstractmethod
def __init__(
self,
start_page: Optional[str] = "about:blank",
viewport_size: Optional[int] = 1024 * 8,
downloads_folder: Optional[Union[str, None]] = None,
bing_api_key: Optional[Union[str, None]] = None,
request_kwargs: Optional[Union[Dict, None]] = None,
):
pass

@property
@abstractmethod
def address(self) -> str:
pass

@abstractmethod
def set_address(self, uri_or_path):
pass

@property
@abstractmethod
def viewport(self) -> str:
pass

@property
@abstractmethod
def page_content(self) -> str:
pass

@abstractmethod
def page_down(self):
pass

@abstractmethod
def page_up(self):
pass

@abstractmethod
def visit_page(self, path_or_uri):
pass
147 changes: 147 additions & 0 deletions autogen/browser_utils/headless_chrome_browser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import re

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

from autogen.browser_utils.abstract_browser import AbstractBrowser

# Optional PDF support
IS_PDF_CAPABLE = False
try:
import pdfminer
import pdfminer.high_level

IS_PDF_CAPABLE = True
except ModuleNotFoundError:
pass

# Other optional dependencies
try:
import pathvalidate
except ModuleNotFoundError:
pass

from typing import Optional, Union, Dict


class HeadlessChromeBrowser(AbstractBrowser):
"""(In preview) A Selenium powered headless Chrome browser. Suitable for Agentic use."""

def __init__(
self,
start_page: Optional[str] = "about:blank",
viewport_size: Optional[int] = 1024 * 8,
downloads_folder: Optional[Union[str, None]] = None,
bing_api_key: Optional[Union[str, None]] = None,
request_kwargs: Optional[Union[Dict, None]] = None,
):
self.start_page = start_page
self.driver = None
self.viewport_size = viewport_size # Applies only to the standard uri types
self.downloads_folder = downloads_folder
self.history = list()
self.page_title = None
self.viewport_current_page = 0
self.viewport_pages = list()
self.bing_api_key = bing_api_key
self.request_kwargs = request_kwargs
self._page_content = ""

self._start_browser()

def _start_browser(self):
chrome_options = Options()
chrome_options.add_argument("--headless")
self.driver = webdriver.Chrome(options=chrome_options)
self.driver.get(self.start_page)

@property
def address(self) -> str:
return self.driver.current_url

def set_address(self, uri_or_path):
if uri_or_path.startswith("bing:"):
self._bing_search(uri_or_path[len("bing:") :].strip())
else:
self.driver.get(uri_or_path)

@property
def viewport(self) -> str:
"""Return the content of the current viewport."""
if not self.viewport_pages:
return ""
bounds = self.viewport_pages[self.viewport_current_page]
return self._page_content[bounds[0] : bounds[1]]

@property
def page_content(self) -> str:
"""Return the full contents of the current page."""
return self._page_content

def _set_page_content(self, content) -> str:
"""Sets the text content of the current page."""
self._page_content = content
self._split_pages()
if self.viewport_current_page >= len(self.viewport_pages):
self.viewport_current_page = len(self.viewport_pages) - 1

def _split_pages(self):
# Split only regular pages
if not self.address.startswith("http:") and not self.address.startswith("https:"):
return

# Handle empty pages
if len(self._page_content) == 0:
self.viewport_pages = [(0, 0)]
return

# Break the viewport into pages
self.viewport_pages = []
start_idx = 0
while start_idx < len(self._page_content):
end_idx = min(start_idx + self.viewport_size, len(self._page_content))
self.viewport_pages.append((start_idx, end_idx))
start_idx = end_idx

def _process_html(self, html: str) -> str:
"""Process the raw HTML content and return the processed text."""
soup = BeautifulSoup(html, "html.parser")

# Remove javascript and style blocks
for script in soup(["script", "style"]):
script.extract()

# Convert to text
text = soup.get_text()

# Remove excessive blank lines
text = re.sub(r"\n{2,}", "\n\n", text).strip()

return text

def _bing_search(self, query):
self.driver.get("https://www.bing.com")

search_bar = self.driver.find_element(By.NAME, "q")
search_bar.clear()
search_bar.send_keys(query)
search_bar.submit()

def page_down(self):
"""Move the viewport one page down."""
if self.viewport_current_page < len(self.viewport_pages) - 1:
self.viewport_current_page += 1

def page_up(self):
"""Move the viewport one page up."""
if self.viewport_current_page > 0:
self.viewport_current_page -= 1

def visit_page(self, path_or_uri):
"""Update the address, visit the page, and return the content of the viewport."""
self.set_address(path_or_uri)
html = self.driver.execute_script("return document.body.innerHTML;")
self._set_page_content(self._process_html(html))
return self.viewport
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import json
import io
import mimetypes
import os
import requests
import re
import markdownify
import io
import uuid
import mimetypes
from urllib.parse import urljoin, urlparse

import markdownify
import requests
from bs4 import BeautifulSoup
from dataclasses import dataclass
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple

from autogen.browser_utils.abstract_browser import AbstractBrowser

# Optional PDF support
IS_PDF_CAPABLE = False
Expand All @@ -27,8 +27,10 @@
except ModuleNotFoundError:
pass

from typing import Optional, Union, Dict


class SimpleTextBrowser:
class SimpleTextBrowser(AbstractBrowser):
"""(In preview) An extremely simple text-based web browser comparable to Lynx. Suitable for Agentic use."""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"teachable": ["chromadb"],
"lmm": ["replicate", "pillow"],
"graphs": ["networkx~=3.2.1", "matplotlib~=3.8.1"],
"websurfer": ["beautifulsoup4", "markdownify", "pdfminer.six", "pathvalidate"],
"websurfer": ["beautifulsoup4", "markdownify", "pdfminer.six", "pathvalidate", "selenium"],
"redis": ["redis"],
},
classifiers=[
Expand Down
Loading

0 comments on commit b4ec0de

Please sign in to comment.