From 0e2fc2f2dd45904238ecf61d2e8c006c96dfba82 Mon Sep 17 00:00:00 2001 From: PSNAppZ Date: Thu, 29 Jun 2017 13:37:46 +0530 Subject: [PATCH 1/4] Major Update 1.0.0 --- modules/getweblinks.py | 18 ++++++++++++------ torBot.py | 24 +++++++++++++++++++----- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/modules/getweblinks.py b/modules/getweblinks.py index 50965591..a6df0f2b 100644 --- a/modules/getweblinks.py +++ b/modules/getweblinks.py @@ -6,18 +6,24 @@ import bs4 """Get all onion links from the website""" -def getLinks(soup): +def getLinks(soup,ext): _soup_instance = bs4.BeautifulSoup - extensions = ['.onion','.onion/'] + extensions = [] + if ext: + for e in ext: + extensions.append(e) if isinstance(type(soup), type(_soup_instance)): websites = [] for link in soup.find_all('a'): web_link = link.get('href') if web_link != None: - if 'http' in web_link: - for extension in extensions: - if web_link.endswith(extension): - websites.append(web_link) + if ('http' in web_link or 'https' in web_link): + if ext: + for exten in extensions: + if web_link.endswith(exten): + websites.append(web_link) + else: + websites.append(web_link) else: pass """Pretty print output as below""" diff --git a/torBot.py b/torBot.py index 62f31e5f..c665f6e7 100644 --- a/torBot.py +++ b/torBot.py @@ -63,7 +63,7 @@ def header(): print( " / /_/ __ \/ __ \/ /_ ____/_ __/ ") print( " / __/ / / / /_/ / __ \/ __ \/ / ") print( " / /_/ /_/ / _, _/ /_/ / /_/ / / ") - print( " \__/\____/_/ |_/_.___/\____/_/ V 0.0.3") + print( " \__/\____/_/ |_/_.___/\____/_/ V 1.0.0") print(bcolors.FAIL+bcolors.On_Black) print("#######################################################") print("# TorBot - A python Tor Crawler #") @@ -74,12 +74,26 @@ def header(): def main(): - header() + parser = argparse.ArgumentParser() + parser.add_argument("-q","--quiet",action="store_true") + parser.add_argument("-u","--url",help="Specifiy a website link to crawl") + parser.add_argument("-m","--mail",action="store_true", help="Get e-mail addresses from the crawled sites.") + parser.add_argument("-e","--extension",action='append',dest='extension',default=[],help="Specifiy additional website extensions to the list(.com or .org etc)") + args = parser.parse_args() + if args.quiet == 0: + header() print ("Tor Ip Address :") + link = args.url + ext = 0 + ext = args.extension a = readPage("https://check.torproject.org/",1) - b = readPage("http://torlinkbgs6aabns.onion/") - getMails(b) - getLinks(b) + if link: + b = readPage(link) + else: + b = readPage("http://torlinkbgs6aabns.onion/") + if args.mail: + getMails(b) + getLinks(b,ext) print ("\n\n") return 0 From f62775c540ee52ed4070bec88d80ef0a2bc3492f Mon Sep 17 00:00:00 2001 From: PSNAppZ Date: Thu, 29 Jun 2017 15:06:58 +0530 Subject: [PATCH 2/4] Fixed Unittest --- modules/getweblinks.py | 13 +++++++++---- tests/test_getweblinks.py | 7 ++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/modules/getweblinks.py b/modules/getweblinks.py index a6df0f2b..4e882f61 100644 --- a/modules/getweblinks.py +++ b/modules/getweblinks.py @@ -31,10 +31,15 @@ def getLinks(soup,ext): print (bcolors.OKGREEN+'Websites Found - '+bcolors.ENDC+str(len(websites))) print ('-------------------------------') for web in websites: - if (urllib.request.urlopen(web).getcode() == 200): - print (web) - else : - print(bcolors.On_Red+web +bcolors.ENDC) + flag=1 + try: + urllib.request.urlopen(web) + except urllib.error.HTTPError as e: + if e.code: + print(bcolors.On_Red+web+bcolors.ENDC) + flag=0 + if flag: + print(web) return websites else: raise('Method parameter is not of instance bs4.BeautifulSoup') diff --git a/tests/test_getweblinks.py b/tests/test_getweblinks.py index 68265789..c0df6c20 100644 --- a/tests/test_getweblinks.py +++ b/tests/test_getweblinks.py @@ -13,12 +13,13 @@ class getLinksTestCase(unittest.TestCase): def setUp(self): self.held, sys.stdout = sys.stdout, StringIO() + self.maxDiff=None def test_print_links(self): #data = "\nWebsites Found - 7\n-------------------------------\nhttp://ads.wsrs.net/www/delivery/ck.php?n=MyIP856a6b4\nhttp://ads.wsrs.net/www/delivery/ck.php?n=MyIPbf5d683\nhttp://aff.ironsocket.com/SH7L\nhttp://aff.ironsocket.com/SH7L\nhttp://ads.wsrs.net/www/delivery/ck.php?n=MyIPdb5f512\nhttp://wsrs.net/\nhttp://cmsgear.com/\n" - data = "\n"+bcolors.OKGREEN+"Websites Found - "+bcolors.ENDC+"0\n-------------------------------\n" - - getweblinks.getLinks(soup) + data = "\n"+bcolors.OKGREEN+"Websites Found - "+bcolors.ENDC+"1\n-------------------------------\nhttp://cmsgear.com/\n" + ext = ['.com/'] + getweblinks.getLinks(soup,ext) self.assertEqual(sys.stdout.getvalue(),data) From e1dd8a749b6dc3429db5aa0c2321fc36de44bb6b Mon Sep 17 00:00:00 2001 From: PSNAppZ Date: Thu, 29 Jun 2017 15:51:37 +0530 Subject: [PATCH 3/4] Added live website checker optional with exceptions --- modules/getweblinks.py | 28 ++++++++++++++++++---------- torBot.py | 7 +++++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/modules/getweblinks.py b/modules/getweblinks.py index 4e882f61..29309543 100644 --- a/modules/getweblinks.py +++ b/modules/getweblinks.py @@ -6,7 +6,7 @@ import bs4 """Get all onion links from the website""" -def getLinks(soup,ext): +def getLinks(soup,ext,live): _soup_instance = bs4.BeautifulSoup extensions = [] if ext: @@ -30,16 +30,24 @@ def getLinks(soup,ext): print ('') print (bcolors.OKGREEN+'Websites Found - '+bcolors.ENDC+str(len(websites))) print ('-------------------------------') - for web in websites: - flag=1 - try: - urllib.request.urlopen(web) - except urllib.error.HTTPError as e: - if e.code: + if live: + for web in websites: + flag=1 + try: + urllib.request.urlopen(web) + except urllib.error.HTTPError as e: + if e.code: + print(bcolors.On_Red+web+bcolors.ENDC) + flag=0 + except urllib.error.URLError as e: print(bcolors.On_Red+web+bcolors.ENDC) flag=0 - if flag: - print(web) - return websites + + if flag: + print(web) + else: + for web in websites: + print(web) + return websites else: raise('Method parameter is not of instance bs4.BeautifulSoup') diff --git a/torBot.py b/torBot.py index c665f6e7..46dfecf1 100644 --- a/torBot.py +++ b/torBot.py @@ -77,14 +77,17 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("-q","--quiet",action="store_true") parser.add_argument("-u","--url",help="Specifiy a website link to crawl") - parser.add_argument("-m","--mail",action="store_true", help="Get e-mail addresses from the crawled sites.") + parser.add_argument("-m","--mail",action="store_true", help="Get e-mail addresses from the crawled sites") parser.add_argument("-e","--extension",action='append',dest='extension',default=[],help="Specifiy additional website extensions to the list(.com or .org etc)") + parser.add_argument("-l","--live",action="store_true",help="Check if websites are live or not (slow)") args = parser.parse_args() if args.quiet == 0: header() print ("Tor Ip Address :") link = args.url ext = 0 + live = 0 + live = args.live ext = args.extension a = readPage("https://check.torproject.org/",1) if link: @@ -93,7 +96,7 @@ def main(): b = readPage("http://torlinkbgs6aabns.onion/") if args.mail: getMails(b) - getLinks(b,ext) + getLinks(b,ext,live) print ("\n\n") return 0 From f2cba79cdc46b2ae441ab5d6dd93c9e8f5dc61ef Mon Sep 17 00:00:00 2001 From: PSNAppZ Date: Thu, 29 Jun 2017 16:08:43 +0530 Subject: [PATCH 4/4] Added live parameter default value --- modules/getweblinks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/getweblinks.py b/modules/getweblinks.py index 29309543..f2bf953b 100644 --- a/modules/getweblinks.py +++ b/modules/getweblinks.py @@ -6,7 +6,7 @@ import bs4 """Get all onion links from the website""" -def getLinks(soup,ext,live): +def getLinks(soup,ext,live=0): _soup_instance = bs4.BeautifulSoup extensions = [] if ext: