diff --git a/modules/__init__.py b/modules/__init__.py index d54c28b0..eadbd6aa 100644 --- a/modules/__init__.py +++ b/modules/__init__.py @@ -3,5 +3,7 @@ from .getweblinks import * from .pagereader import * from .updater import * +from .savefile import * -__all__ = (bcolors.__all__ + getemails.__all__ + getweblinks.__all__ + pagereader.__all__ + updater.__all__) + +__all__ = (bcolors.__all__ + getemails.__all__ + getweblinks.__all__ + pagereader.__all__ + updater.__all__ + savefile.__all__ ) diff --git a/modules/getemails.py b/modules/getemails.py index 5fcc138e..eb07ab25 100644 --- a/modules/getemails.py +++ b/modules/getemails.py @@ -2,13 +2,14 @@ import os sys.path.append(os.path.abspath('../')) from modules.bcolors import Bcolors +from modules.savefile import saveJson import bs4 __all__ = ['getMails'] """Get all emails from the website""" -def getMails(soup): +def getMails(soup,save=0): _soup_instance = bs4.BeautifulSoup if isinstance(type(soup), type(_soup_instance)): emails = [] @@ -27,6 +28,8 @@ def getMails(soup): print ('-------------------------------') for mail in emails: print (mail) + if save: + saveJson("Extracted-Mail-IDs",emails) return '' else: raise(Bcolors.FAIL+'Method parameter is not of instance bs4.BeautifulSoup'+Bcolors.ENDC) diff --git a/modules/getweblinks.py b/modules/getweblinks.py index 8a9e9574..ce65b5f4 100644 --- a/modules/getweblinks.py +++ b/modules/getweblinks.py @@ -1,6 +1,7 @@ import sys import os sys.path.append(os.path.abspath('../')) +from modules.savefile import saveJson import urllib.request from modules.bcolors import Bcolors import bs4 @@ -10,11 +11,14 @@ __all__ = ['getLinks'] -def link_status(web): + +def link_status(web,out_queue,index): link_live = False + out_queue[index] = web + " is_live = False " try: urllib.request.urlopen(web) link_live = True + out_queue[index] = web + " is_live = True " print(web) except urllib.error.HTTPError as e: print(Bcolors.On_Red+web+Bcolors.ENDC) @@ -22,11 +26,11 @@ def link_status(web): print(Bcolors.On_Red+web+Bcolors.ENDC) except http.client.RemoteDisconnected as e: print(Bcolors.On_Red+web+Bcolors.ENDC) - return + return """Get all onion links from the website""" -def getLinks(soup,ext,live=0): +def getLinks(soup,ext,live=0,save=0): _soup_instance = bs4.BeautifulSoup extensions = [] if ext: @@ -53,13 +57,23 @@ def getLinks(soup,ext,live=0): print ('-------------------------------') if live: threads = [] + result = [{} for x in websites] for web in websites: - t = threading.Thread(target=link_status, args=(web,)) - threads.append(t) + t = threading.Thread(target=link_status, args=(web,result,websites.index(web))) t.start() + threads.append(t) + try: + for t in threads: + t.join() + if save: + saveJson("Live-Onion-Links",result) + except: + pass else: for web in websites: print(web) + if save: + saveJson("Onion-Links",websites) return websites else: raise('Method parameter is not of instance bs4.BeautifulSoup') diff --git a/modules/savefile.py b/modules/savefile.py new file mode 100644 index 00000000..82c295d2 --- /dev/null +++ b/modules/savefile.py @@ -0,0 +1,21 @@ +import json +import time + +__all__ = ['saveJson'] + +# open the file "TorBoT-Export" in write ("a") mode +def saveJson(datatype,data): + "function_docstring" + timestr = time.strftime("%Y%m%d-%H%M%S") + #Json File Creation + file = open("TorBoT-Export-"+datatype+timestr+".json", "a") + #Store data in Json format + output = {datatype : data} + #Dump output to file + json.dump(output, file, indent=2) + file.close() + print("\nData will be saved with a File Name :"+ "TorBoT-Export-"+datatype+timestr+".json") + return + + + diff --git a/torBot.py b/torBot.py index f2abacf6..a28be0a8 100644 --- a/torBot.py +++ b/torBot.py @@ -14,12 +14,12 @@ from stem.control import Controller with Controller.from_port(port = 9051) as controller: - controller.authenticate("16:872860B76453A77D60CA2BB8C1A7042072093276A3D701AD684053EC4C") + controller.authenticate("16:3BEA46EB6C489B90608A65120BD7CF0C7BA709513AB8ACF212B9537183") controller.signal(Signal.NEWNYM) #TorBot VERSION _VERSION_ = "1.0.1" #TOR SETUP GLOBAL Vars -SOCKS_PORT = 9050 # TOR proxy port that is default from torrc, change to whatever torrc is configured to +SOCKS_PORT = 9050 # TOR proxy port that is default from torrc, change to whatever torrc is configured to socks.set_default_proxy(socks.SOCKS5, "127.0.0.1",SOCKS_PORT) socket.socket = socks.socksocket # Perform DNS resolution through the socket @@ -83,6 +83,7 @@ def main(): parser.add_argument("--update",action="store_true",help="Update TorBot to the latest stable version") parser.add_argument("-q","--quiet",action="store_true") parser.add_argument("-u","--url",help="Specifiy a website link to crawl") + parser.add_argument("-s","--save",action="store_true", help="Save results in a file") parser.add_argument("-m","--mail",action="store_true", help="Get e-mail addresses from the crawled sites") parser.add_argument("-e","--extension",action='append',dest='extension',default=[],help="Specifiy additional website extensions to the list(.com or .org etc)") parser.add_argument("-l","--live",action="store_true",help="Check if websites are live or not (slow)") @@ -100,16 +101,18 @@ def main(): link = args.url ext = 0 live = 0 + save=0 live = args.live ext = args.extension + save = args.save a = readPage("https://check.torproject.org/",1) if link: b = readPage(link) else: b = readPage("http://torlinkbgs6aabns.onion/") if args.mail: - getMails(b) - getLinks(b,ext,live) + getMails(b,save) + getLinks(b,ext,live,save) print ("\n\n") return 0