Skip to content

Commit

Permalink
Issue #62: Report to file (store lines in UrlsInfo).
Browse files Browse the repository at this point in the history
  • Loading branch information
Nekmo committed Oct 9, 2019
1 parent f22593c commit a31b487
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
8 changes: 5 additions & 3 deletions dirhunt/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@


class Crawler(ThreadPoolExecutor):
urls_info = None

def __init__(self, max_workers=None, interesting_extensions=None, interesting_files=None, std=None,
progress_enabled=True, timeout=10, depth=3, not_follow_subdomains=False, exclude_sources=(),
not_allow_redirects=False, proxies=None, delay=0, limit=1000, to_file=None):
Expand Down Expand Up @@ -169,8 +171,8 @@ def print_urls_info(self):
self.echo(r'No interesting files detected ¯\_(ツ)_/¯')
return
self.echo('━' * get_terminal_size()[0])
UrlsInfo(self.index_of_processors, self.sessions, self.std, self._max_workers, self.progress_enabled,
self.timeout).start()
self.urls_info = UrlsInfo(self.index_of_processors, self.sessions, self.std, self._max_workers,
self.progress_enabled, self.timeout, bool(self.to_file)).start()

def restart(self):
try:
Expand All @@ -195,5 +197,5 @@ def json(self):
'index_of_processors': self.index_of_processors,
'processing': self.processing,
'processed': self.processed,
# TODO: self.results ?
# TODO: self.urls_info.lines
}
7 changes: 6 additions & 1 deletion dirhunt/url_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ class UrlsInfo(Pool):
count = 0
current = 0

def __init__(self, processors, sessions, std=None, max_workers=None, progress_enabled=True, timeout=10):
def __init__(self, processors, sessions, std=None, max_workers=None, progress_enabled=True, timeout=10,
save_info=False):
super(UrlsInfo, self).__init__(max_workers)
self.lock = Lock()
self.processors = processors
Expand All @@ -157,6 +158,8 @@ def __init__(self, processors, sessions, std=None, max_workers=None, progress_en
self.spinner = random_spinner()
self.progress_enabled = progress_enabled
self.timeout = timeout
self.lines = []
self.save_info = save_info

def callback(self, url_len, extra_len, file):
line = None
Expand All @@ -168,6 +171,8 @@ def callback(self, url_len, extra_len, file):
self.error_files += 1
self.lock.acquire()
self.erase()
if self.save_info:
self.lines.append(line)
if line:
self.echo(line)
self.print_progress()
Expand Down

0 comments on commit a31b487

Please sign in to comment.