Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A mission to produce a beautiful and comprehensible code base. (Documenting and Refactoring) #46

Merged
merged 10 commits into from
Jan 11, 2018
10 changes: 2 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,8 @@ Before you run the torBot make sure the following things are done properly:
* Run tor service
`sudo service tor start`

* Set a password for tor
`tor --hash-password "my_password" `

* Give the password inside torbot.py
`from stem.control import Controller
with Controller.from_port(port = 9051) as controller:
controller.authenticate("your_password_hash")
controller.signal(Signal.NEWNYM)`
* Make sure that your torrc is configured to SOCKS_PORT localhost:9050 which should be the
should default setting

`python3 torBot.py`
<pre>
Expand Down
50 changes: 27 additions & 23 deletions modules/getemails.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,40 @@
from modules.bcolors import Bcolors
from bs4 import BeautifulSoup
from modules.savefile import saveJson


"""Get all emails from the website"""
def getMails(soup):

"""
Searches for <a href> tags for links then checks if link ccontains the
substring 'mailto' indicating that it's an email. If it is determined
to be an email then the link is split and the username is appeneded to
the list

def getMails(soup, save=0):
Args:
soup: BeautifulSoup isntance that will be used for parsing

Returns:
emails: list of email IDs
"""
b_colors = Bcolors()
_soup_instance = BeautifulSoup
if isinstance(type(soup), type(_soup_instance)):

if isinstance(type(soup), type(BeautifulSoup)):

emails = []
for link in soup.find_all('a'):
email_link = link.get('href')
if email_link is not None:
if 'mailto' in email_link:
"""Split email address on"""
email_addr = email_link.split(':')
emails.append(email_addr[1])
else:
pass
links = soup.find_all('a')
for ref in links:
url = ref.get('href')
if url and 'mailto' in url:
"""Split email address on"""
email_addr = url.split(':')
emails.append(email_addr[1])

"""Pretty print output as below"""
print ('')
print (b_colors.OKGREEN+'Mails Found - '+b_colors.ENDC+str(len(emails)))
print ('-------------------------------')
for mail in emails:
print (mail)
if save:
saveJson("Extracted-Mail-IDs", emails)
return ''

return emails

else:
msg = ''.join((b_colors.FAIL,
'Method parameter is not of instance BeautifulSoup',
b_colors.ENDC))
raise(msg)
raise('Method parameter is not of instance BeautifulSoup')
81 changes: 57 additions & 24 deletions modules/getweblinks.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,75 @@
import urllib.request
import re
from modules.bcolors import Bcolors
from bs4 import BeautifulSoup


def valid_onion_url(link):

def link_status(web, out_queue, index):
b_colors = Bcolors()
out_queue[index] = web + " is_live = False "
try:
urllib.request.urlopen(web)
out_queue[index] = web + " is_live = True "
print(web)
except urllib.error.HTTPError:
print(b_colors.On_Red+web+b_colors.ENDC)
"""
Validates onion urls using regex

Args:
link: the url to be checked

Returns:
bool: True/False based on link
"""

pattern = r"^https?\b(://+)(.+)(.+)\bonion/(.*)"
re_obj = re.compile(pattern)
if re_obj.fullmatch(link):
return True

return False


def valid_url(link):

"""
Validates general urls using regex

Takes in string which is a link and returns decides validitity of url
using regex

Args:
link: the url to be checked

Returns:
bool: True/False based on link
"""

pattern = r"^https?\b(://+)(.+)(.+)\b...(.*)"
re_obj = re.compile(pattern)
if re_obj.fullmatch(link):
return True

return False


def getLinks(soup):

"""
Searches through all <a ref> (hyperlinks) tags and stores them in a
list then validates if the url is formatted correctly.

def getLinks(soup, ext, live=0, save=0):
Args:
soup: BeautifulSoup instance currently being used.

"""Get all onion links from the website"""
Returns:
websites: List of websites that were found
"""

b_colors = Bcolors()
extensions = []
if ext:
for e in ext:
extensions.append(e)

if isinstance(type(soup), type(BeautifulSoup)):
websites = []

for link in soup.find_all('a'):
web_link = link.get('href')
if web_link and ('http' in web_link or 'https' in web_link):
links = soup.find_all('a')
for ref in links:
url = ref.get('href')
if url and (valid_onion_url(url) or valid_url(url)):
websites.append(url)

for exten in extensions:
if web_link.endswith(exten):
websites.append(web_link)
else:
websites.append(web_link)
"""Pretty print output as below"""
print(''.join((b_colors.OKGREEN,
'Websites Found - ', b_colors.ENDC, str(len(websites)))))
Expand Down
20 changes: 13 additions & 7 deletions modules/savefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@


def saveJson(datatype, data):
"""
Creates json file and stores json

Args:
datatype: the type of the object being passed
data = data that is being stored with object
"""

"function_docstring"
timestr = time.strftime("%Y%m%d-%H%M%S")
# Json File Creation
file = open("TorBoT-Export-"+datatype+timestr+".json", "a")
# Store data in Json format
output = {datatype: data}
# Dump output to file
json.dump(output, file, indent=2)
file.close()
with open("TorBoT-Export-"+datatype+timestr+".json", "x") as file:
# Store data in Json format
output = {datatype: data}
# Dump output to file
json.dump(output, file, indent=2)

print("\nData will be saved with a File Name :",
"TorBoT-Export-"+datatype+timestr+".json")
7 changes: 7 additions & 0 deletions modules/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@


def updateTor():

"""
Currently updates Tor by calling terminal commands using subprocess
Not a great method and will be replaced in the future.

"""

print("Checking for latest stable release")
isGit = subprocess.Popen(
["git", "branch"],
Expand Down
12 changes: 4 additions & 8 deletions tests/test_getemails.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))

from modules import pagereader, getemails
from io import StringIO
from modules.bcolors import Bcolors


Expand All @@ -19,15 +18,12 @@
class getMailsTestCase(unittest.TestCase):

def setUp(self):
self.held, sys.stdout = sys.stdout, StringIO()
self.b_colors = Bcolors()

def test_print_emails(self):
data = ''.join(("\n", self.b_colors.OKGREEN, "Mails Found - ",
self.b_colors.ENDC, "1\n------------------------",
"-------\[email protected]\n"))
getemails.getMails(soup)
self.assertEqual(sys.stdout.getvalue(), data)
def test_getemails(self):
test_emails = ["[email protected]"]
emails = getemails.getMails(soup)
self.assertEqual(emails, test_emails)


if __name__ == '__main__':
Expand Down
12 changes: 5 additions & 7 deletions tests/test_getweblinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,14 @@ def setUp(self):
self.held, sys.stdout = sys.stdout, StringIO()
self.maxDiff = None

def test_print_links(self):
def test_get_links(self):

data = ['http://aff.ironsocket.com/SH7L',
'http://aff.ironsocket.com/SH7L',
'http://wsrs.net/',
'http://cmsgear.com/',
'http://cmsgear.com/']
'http://aff.ironsocket.com/SH7L',
'http://wsrs.net/',
'http://cmsgear.com/']

ext = ['.com/']
result = getweblinks.getLinks(soup, ext)
result = getweblinks.getLinks(soup)
self.assertEqual(result, data)


Expand Down
4 changes: 1 addition & 3 deletions tests/test_savetofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ def test_save_links(self):
data = ['http://aff.ironsocket.com/SH7L',
'http://aff.ironsocket.com/SH7L',
'http://wsrs.net/',
'http://cmsgear.com/',
'http://cmsgear.com/']
ext = ['.com/']
result = getweblinks.getLinks(soup, ext, 0, 1)
result = getweblinks.getLinks(soup)
self.assertEqual(result, data)


Expand Down
Loading