-
Notifications
You must be signed in to change notification settings - Fork 0
/
tools.py
101 lines (79 loc) · 4.09 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import json
import os
import requests
from agency_swarm.tools import BaseTool
from pydantic import Field
from bs4 import BeautifulSoup
from utils import load_config
load_config(file_path="./config.yaml")
class SearchEngine(BaseTool):
"""
SearchEngine: A search engine tool. You can use this tool to search for a specific query on a search engine.
The output of the search engine is a dictionary where the key is the source of the information and the value is the content.
"""
search_engine_query: str = Field(
..., description= "Search engine query to be executed by the tool"
)
def format_results(self, organic_results):
result_strings = []
for result in organic_results:
title = result.get('title', 'No Title')
link = result.get('link', '#')
snippet = result.get('snippet', 'No snippet available.')
result_strings.append(f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n---")
return '\n'.join(result_strings)
def run(self):
search_url = "https://google.serper.dev/search"
headers = {
'Content-Type': 'application/json',
'X-API-KEY': os.environ['SERPER_DEV_API_KEY'] # Ensure this environment variable is set with your API key
}
payload = json.dumps({"q": self.search_engine_query})
# Attempt to make the HTTP POST request
try:
response = requests.post(search_url, headers=headers, data=payload)
response.raise_for_status() # Raise an HTTPError for bad responses (4XX, 5XX)
results = response.json()
# Check if 'organic' results are in the response
if 'organic' in results:
return self.format_results(results['organic'])
else:
return "No organic results found."
except requests.exceptions.HTTPError as http_err:
return f"HTTP error occurred: {http_err}"
except requests.exceptions.RequestException as req_err:
return f"Request exception occurred: {req_err}"
except KeyError as key_err:
return f"Key error in handling response: {key_err}"
class ScrapeWebsite(BaseTool):
"""
ScrapeWebsite: A website scraping tool. You can use this tool to scrape the content of a website.
You must provide the URL of the website you want to scrape.
"""
website_url: str = Field(
..., description= "The URL of the website to scrape the content from."
)
def run(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'en-US,en;q=0.9',
'Referer': 'https://www.google.com/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Accept-Encoding': 'gzip, deflate, br'
}
try:
# Making a GET request to the website
response = requests.get(self.website_url, headers=headers, timeout=15)
response.raise_for_status() # This will raise an exception for HTTP errors
# Parsing the page content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
text = soup.get_text(separator='\n')
# Cleaning up the text: removing excess whitespace
clean_text = '\n'.join([line.strip() for line in text.splitlines() if line.strip()])
print(f"Successfully scraped content from {self.website_url}")
return {self.website_url: clean_text}
except requests.exceptions.RequestException as e:
print(f"Error retrieving content from {self.website_url}: {e}")
return {self.website_url: f"Failed to retrieve content due to an error: {e}"}