-
Notifications
You must be signed in to change notification settings - Fork 13
/
proxy_crawler.py
executable file
·35 lines (30 loc) · 1.08 KB
/
proxy_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# rootVIII | proxy_crawler.py
from os import path
from pathlib import Path
from sys import exit
from utils.web_crawler import ProxyCrawler
from utils.utils import get_cli_args
def main(url: str, keyword: str, is_headless: bool):
project_path = str(Path(__file__).parent)
with open(path.join(project_path, 'user-agents.txt')) as file_in:
user_agents = [line.strip() for line in file_in.readlines() if line.strip()]
if not user_agents:
raise RuntimeError('missing user-agents in user-agents.txt')
while True:
bot = ProxyCrawler(url, keyword, is_headless, user_agents)
print('searching for %s keyword(s): %s' % (url, keyword))
bot.start_search()
if __name__ == "__main__":
args = get_cli_args()
if args.url[:8] != 'https://':
print('include protocol in URL: https://')
exit(1)
try:
main(args.url, args.keyword, args.headless)
except KeyboardInterrupt:
print('exiting...\n')
exit()
except Exception as err:
print('encountered error, exiting...')
print(err)
exit(1)