Skip to content

Commit 9118ca0

Browse files
authored
Merge pull request #31 from y-mehta/dev
Feature_Save_To_File
2 parents 14665ae + 8631ae7 commit 9118ca0

File tree

5 files changed

+54
-11
lines changed

5 files changed

+54
-11
lines changed

modules/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@
33
from .getweblinks import *
44
from .pagereader import *
55
from .updater import *
6+
from .savefile import *
67

7-
__all__ = (bcolors.__all__ + getemails.__all__ + getweblinks.__all__ + pagereader.__all__ + updater.__all__)
8+
9+
__all__ = (bcolors.__all__ + getemails.__all__ + getweblinks.__all__ + pagereader.__all__ + updater.__all__ + savefile.__all__ )

modules/getemails.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22
import os
33
sys.path.append(os.path.abspath('../'))
44
from modules.bcolors import Bcolors
5+
from modules.savefile import saveJson
56
import bs4
67

78
__all__ = ['getMails']
89

910
"""Get all emails from the website"""
1011

11-
def getMails(soup):
12+
def getMails(soup,save=0):
1213
_soup_instance = bs4.BeautifulSoup
1314
if isinstance(type(soup), type(_soup_instance)):
1415
emails = []
@@ -27,6 +28,8 @@ def getMails(soup):
2728
print ('-------------------------------')
2829
for mail in emails:
2930
print (mail)
31+
if save:
32+
saveJson("Extracted-Mail-IDs",emails)
3033
return ''
3134
else:
3235
raise(Bcolors.FAIL+'Method parameter is not of instance bs4.BeautifulSoup'+Bcolors.ENDC)

modules/getweblinks.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import sys
22
import os
33
sys.path.append(os.path.abspath('../'))
4+
from modules.savefile import saveJson
45
import urllib.request
56
from modules.bcolors import Bcolors
67
import bs4
@@ -10,23 +11,26 @@
1011

1112
__all__ = ['getLinks']
1213

13-
def link_status(web):
14+
15+
def link_status(web,out_queue,index):
1416
link_live = False
17+
out_queue[index] = web + " is_live = False "
1518
try:
1619
urllib.request.urlopen(web)
1720
link_live = True
21+
out_queue[index] = web + " is_live = True "
1822
print(web)
1923
except urllib.error.HTTPError as e:
2024
print(Bcolors.On_Red+web+Bcolors.ENDC)
2125
except urllib.error.URLError as e:
2226
print(Bcolors.On_Red+web+Bcolors.ENDC)
2327
except http.client.RemoteDisconnected as e:
2428
print(Bcolors.On_Red+web+Bcolors.ENDC)
25-
return
29+
return
2630

2731

2832
"""Get all onion links from the website"""
29-
def getLinks(soup,ext,live=0):
33+
def getLinks(soup,ext,live=0,save=0):
3034
_soup_instance = bs4.BeautifulSoup
3135
extensions = []
3236
if ext:
@@ -53,13 +57,23 @@ def getLinks(soup,ext,live=0):
5357
print ('-------------------------------')
5458
if live:
5559
threads = []
60+
result = [{} for x in websites]
5661
for web in websites:
57-
t = threading.Thread(target=link_status, args=(web,))
58-
threads.append(t)
62+
t = threading.Thread(target=link_status, args=(web,result,websites.index(web)))
5963
t.start()
64+
threads.append(t)
65+
try:
66+
for t in threads:
67+
t.join()
68+
if save:
69+
saveJson("Live-Onion-Links",result)
70+
except:
71+
pass
6072
else:
6173
for web in websites:
6274
print(web)
75+
if save:
76+
saveJson("Onion-Links",websites)
6377
return websites
6478
else:
6579
raise('Method parameter is not of instance bs4.BeautifulSoup')

modules/savefile.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import json
2+
import time
3+
4+
__all__ = ['saveJson']
5+
6+
# open the file "TorBoT-Export" in write ("a") mode
7+
def saveJson(datatype,data):
8+
"function_docstring"
9+
timestr = time.strftime("%Y%m%d-%H%M%S")
10+
#Json File Creation
11+
file = open("TorBoT-Export-"+datatype+timestr+".json", "a")
12+
#Store data in Json format
13+
output = {datatype : data}
14+
#Dump output to file
15+
json.dump(output, file, indent=2)
16+
file.close()
17+
print("\nData will be saved with a File Name :"+ "TorBoT-Export-"+datatype+timestr+".json")
18+
return
19+
20+
21+

torBot.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
from stem.control import Controller
1515

1616
with Controller.from_port(port = 9051) as controller:
17-
controller.authenticate("16:872860B76453A77D60CA2BB8C1A7042072093276A3D701AD684053EC4C")
17+
controller.authenticate("16:3BEA46EB6C489B90608A65120BD7CF0C7BA709513AB8ACF212B9537183")
1818
controller.signal(Signal.NEWNYM)
1919
#TorBot VERSION
2020
_VERSION_ = "1.0.1"
2121
#TOR SETUP GLOBAL Vars
22-
SOCKS_PORT = 9050 # TOR proxy port that is default from torrc, change to whatever torrc is configured to
22+
SOCKS_PORT = 9050 # TOR proxy port that is default from torrc, change to whatever torrc is configured to
2323
socks.set_default_proxy(socks.SOCKS5, "127.0.0.1",SOCKS_PORT)
2424
socket.socket = socks.socksocket
2525
# Perform DNS resolution through the socket
@@ -83,6 +83,7 @@ def main():
8383
parser.add_argument("--update",action="store_true",help="Update TorBot to the latest stable version")
8484
parser.add_argument("-q","--quiet",action="store_true")
8585
parser.add_argument("-u","--url",help="Specifiy a website link to crawl")
86+
parser.add_argument("-s","--save",action="store_true", help="Save results in a file")
8687
parser.add_argument("-m","--mail",action="store_true", help="Get e-mail addresses from the crawled sites")
8788
parser.add_argument("-e","--extension",action='append',dest='extension',default=[],help="Specifiy additional website extensions to the list(.com or .org etc)")
8889
parser.add_argument("-l","--live",action="store_true",help="Check if websites are live or not (slow)")
@@ -100,16 +101,18 @@ def main():
100101
link = args.url
101102
ext = 0
102103
live = 0
104+
save=0
103105
live = args.live
104106
ext = args.extension
107+
save = args.save
105108
a = readPage("https://check.torproject.org/",1)
106109
if link:
107110
b = readPage(link)
108111
else:
109112
b = readPage("http://torlinkbgs6aabns.onion/")
110113
if args.mail:
111-
getMails(b)
112-
getLinks(b,ext,live)
114+
getMails(b,save)
115+
getLinks(b,ext,live,save)
113116
print ("\n\n")
114117
return 0
115118

0 commit comments

Comments
 (0)